[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[EP-tech] Re: RoMEO autocomplete



On 10/04/14 18:05, Gilles Fourni? wrote:
> Hi,
>
> We would like to add an autocompletion for the journal fields.
>
> As explained in the Wiki page
> http://wiki.eprints.org/w/Autocompletion_and_Authority_Files_%28Romeo_Autocomplete%29
> we have downloaded and used the file found at
> http://romeo.eprints.org/romeo_journals.autocomplete.
>
> The solution works well.
>
> But we realized that the file romeo_journals.autocomplete is old. Its
> date is Jan 19, 2009. Of course, we checked its content and we noticed
> that journals we can find on the Sherpa/RoMEO website are not in the
> file. So, it seems it has not been updated for a while.
>
> Does anybody know if there is a plan to update it ? Or if we can find
> another one elsewhere ?

I have two files for you, which relate to the wiki page 
http://wiki.eprints.org/w/Adding_an_Auto-Completer_to_a_non-workflow_page

update_romeo_publishers lives in eprints/~~/bin and should be run daily 
- it creates a set of lookup files

get_journals lives in eprints/~~/cgi and is the bit that does the AJAX 
response stuff, using the files created above

-- 

Ian Stuart.
Developer: ORI, RJ-Broker, and OpenDepot.org
Bibliographics and Multimedia Service Delivery team,
EDINA,
The University of Edinburgh.

http://edina.ac.uk/

This email was sent via the University of Edinburgh.

The University of Edinburgh is a charitable body, registered in
Scotland, with registration number SC005336.

-------------- next part --------------
use strict;
use HTTP::Request;
use LWP::UserAgent;
use XML::Twig;
use File::Slurp;
use EPrints;

my $varpath = $EPrints::SystemSettings::conf->{base_path}.'/var/romeopub';
my $mapfile = "$varpath/map.txt";
my $pub_map = {};
open( MAP, $mapfile ) || die "can't read mapfile: $mapfile: $!";
while(<MAP>)
{
	chomp;
	m/^(\d+) (.*)/;
	$pub_map->{$2} = $1;
}
close MAP;



#########################
# Some global variables #
#########################
my $journal_data = {};

#######################
# Various subroutines #
#######################
sub urldecode{
  my ($url) = @_;
  $url =~ s/%([0-9a-f][0-9a-f])/pack("C",hex($1))/egi;
  $url =~ s/\x2B/ /; # swap '+' for ' '
  return $url;
}

# XML::Twig's routine for dealing with a journal entry
sub process_journal {
  my ( $twig, $journal ) = @_;

  # get the components
  my $title = urldecode( $journal->first_child('jtitle')->text );

  my $zetoc = urldecode( $journal->first_child('zetocpub')->text ) 
                  if $journal->first_child('zetocpub');
  my $romeo = urldecode( $journal->first_child('romeopub')->text )
                  if $journal->first_child('romeopub');
  my $issn  = urldecode( $journal->first_child('issn')->text )
                  if $journal->first_child('issn');

  my $publisher = $romeo;
  $publisher = $zetoc if (not $publisher && $zetoc);
  my $conditions = qq(<div class='romeo_message'><div class='romeo_yellow_content'><table width="100%"><tbody><tr><td><img class='romeo_message_icon' src='/style/images/alert.png' alt='Archiving of pre- and post-prints is not as straight forward as it could be.'></td><td><div class='publishers'><div style='float:right; padding:5px; width: 10em'><img src='http://www.sherpa.ac.uk/images/romeotiny.jpg' alt='SHERPA/RoMEO logo' title='Record data from the SHERPA/RoMEO database' /><p style='font-size:75%;'>SHERPA/Romeo is a project that categorises publisher policies on OA archiving.</p></div> <p>The publishers conditions are not defined.</p></div></td></tr></table></div></div>);
  if( defined $pub_map->{$publisher} )
  {
    my $fn = $varpath."/".$pub_map->{$publisher}.".xhtml";
    $conditions = read_file($fn);
  }
  # build a lub of html based on the components
  my $html .= "<li>$title";
  $html .= "<br />published by $publisher" if $publisher;
 
  $title = "" unless $title; 
  $publisher = "" unless $publisher; 
  $issn = "" unless $issn; 
  $html .= "<ul>";
  $html .= "<li id='for:value:component:_publication'>$title</li>";
  $html .= "<li id='for:value:component:_publisher'>$publisher</li>";
  $html .= "<li id='for:value:component:_issn'>$issn</li>";
  $html .= "<li id='for:block:absolute:publisher_policy'>$conditions</li>";
  $html .= "</ul></li>\n";
  # save the html
  $journal_data->{$title} = $html;

  1; 
} ## end process_journal

# get a list of journals that match the query
sub get_journals {
  my $journal = shift;
  my @html = ();

  if (!$journal) 
  {
    return "<!-- No journal name supplied -->\n";
  }

  return ("<ul><li>keep typing....</li></ul>") if (length($journal) < 3);

  $journal =~ s/([^a-z0-9])/sprintf("%%%02X",ord($1))/ige;
  my $query = "http://www.sherpa.ac.uk/romeo/api29.php?qtype=starts&jtitle=$journal&ak=hC0DitNXMJA";;


  my $request = HTTP::Request->new( GET => "$query" );

  my $ua = LWP::UserAgent->new();
  my $response = $ua->request($request);
  my $content = $response->content();

  my $twig = XML::Twig->new(
                     'keep_encoding' => 1,
                     'TwigRoots' => { 'journals' => 1 },
                     'TwigHandlers' => { 'journal' => \&process_journal, }
                    );
  $twig->parse($content);

  if (!scalar keys %{$journal_data}) 
  {
    push @html, "<!-- no matches -->";
    return (join "\n", @html)
  }

  push @html, "<ul class='journals'>\n";
  foreach my $title (sort keys %{$journal_data}) 
  {
    push @html, "$journal_data->{$title}\n";
  } ## end of  foreach my $title (sort keys %{$journal_data})
  push @html, "</ul>\n";
  return (join "\n", @html)
} ## end get_journals

my $session = EPrints::Session->new();

# we need the send an initial content-type
print <<END;
<?xml version="1.0" encoding="UTF-8" ?>

END

# then we send the fragment of html for the autocompleter
my $q = "";
$q = lc $session->param( "q" );
print get_journals( $q );
#print STDERR get_journals( $q );

$session->terminate;

-------------- next part --------------
#!/home/cpan/bin/perl -w -I/home/oarj/eprints/perl_lib

use strict;
use utf8;
use HTTP::Request;
use LWP::UserAgent;
use XML::Twig;
use EPrints::SystemSettings;

use Data::Dumper;

my $publisher_data = {};
my $pub_map = {};

sub get_romeo_pub_ids
{
  my $query = "http://www.sherpa.ac.uk/romeo/api.php?all=yes&ak=<your_key>";

  my $request = HTTP::Request->new( GET => "$query" );

  my $ua = LWP::UserAgent->new();
  my $response = $ua->request($request);
  my $content = $response->content();
  my @contents = split /\n/, $content;
  my @pubids = ();
  foreach (@contents) {
     if( m/id="([0-9]+)"/ ) { push @pubids, $1; }
  };
  warn ("pubids: ".scalar @pubids."\n");
  if( scalar @pubids < 100 ) { die "urk, not enough pubids"; }
  return @pubids;
}



my %depositing = (
      'pre' => {
        'can' => 'It permits archiving of preprints',
        'cannot' => 'It prohibits archiving of preprints',
        'restricted' => 'It permits OA archiving of preprints subject to restrictions (see below)',
        'unclear' => 'Its policy on OA archiving of preprints is unclear.  Please check the publisher policy (see link below)'
      },
      'post' => {
        'can' => 'It permits archiving of postprints',
        'cannot' => 'It prohibits archiving of postprints',
        'restricted' => 'It permits OA archiving of postprints subject to restrictions (see below)',
        'unclear' => 'Its policy on OA archiving of postprints is unclear.  Please check the publisher policy (see link below)'
      }
);
##<strong>pre-print</strong>
#######################
# Various subroutines #
#######################
sub process_prints {
  my ($which, $print_twig) = @_;

  my $text;
  my $permission = $print_twig->first_child("${which}archiving")->text;
  my @restrictions = $print_twig->first_child("${which}restrictions")->children if $print_twig->first_child("${which}restrictions");

  if ($permission) {
    $text = "<dl><dt>".$depositing{$which}{$permission}."</dt>\n";
    if (scalar @restrictions) {
      $text .= "<dd>The publisher defines the following restriction:\n<ul>\n";
      foreach my $restriction (@restrictions) {
        $text .= "<li>".$restriction->text."</li>\n";
      } ## end of foreach restriction
      $text .= "</ul>\n</dd>\n";
    } ## end of if scalar restrictions
    $text .= "</dl>\n";
  } ## end of if $permission
  return $text;
}

# XML::Twig's routine for dealing with a journal entry
sub process_publisher {
  my ( $twig, $publisher ) = @_;

  # get the components
  my ($pubid, $name, $homeurl, $romeocolour, $copyright, $alias, $permission);
  my @restrictions;

  $name = $publisher->first_child('name')->text;
  $pubid = $publisher->{att}->{id};

  $pub_map->{$name} = $pubid;
  $homeurl = $publisher->first_child('homeurl')->text if $publisher->first_child('homeurl');
  $romeocolour = $publisher->first_child('romeocolour')->text if $publisher->first_child('romeocolour');
  $copyright = $publisher->first_child('copyright')->text if $publisher->first_child('copyright');
  #$alias = $publisher->first_child('alias')->text if $publisher->first_child('alias');

  my @conditions = $publisher->first_child('conditions')->children;
  my @mandates = $publisher->first_child('mandates')->children;
  
  # build a lump of html based on the data returned.
  my $html ;
  $html .= "<div class='romeo_message'>";
  if ($romeocolour eq 'green') {
    $html .= "<div class='romeo_green_content'><table style='width:100%; border:1px solid blue;'><tbody><tr><td><img class='romeno_message_icon' src='/style/images/good.png' alt='Archiving of pre- and post-prints is permitted'>";
  } elsif ($romeocolour eq 'red') {
    $html .= "<div class='romeo_orange_content'><table style='width:100%; border:1px solid red;'><tbody><tr><td><img class='romeo_message_icon' src='/style/images/warning.png' alt='Archiving of pre- and post-prints is not permitted'>";
  } else {
    $html .= "<div class='romeo_yellow_content'><table style='width:100%; border:1px solid yellow;'><tbody><tr><td><img class='romeo_message_icon' src='/style/images/alert.png' alt='Archiving of pre- and post-prints is not as straight forward as it could be.'>";
  }
  $html .= "</td><td><div class='publishers'>";
  $html .= "<div style='float:right; padding:5px; width: 10em'><img src='http://www.sherpa.ac.uk/images/romeotiny.jpg' alt='SHERPA/RoMEO logo' title='Record data from the SHERPA/RoMEO database' /><p style='font-size:75%;'>SHERPA/Romeo is a project that categorises publisher policies on OA archiving.</p></div>\n";
  $html .= "<p>This journal is published by ";
  if ($homeurl) {
    $html .= "<a href='$homeurl' title='Link to the publishers home page. NOTE: this will open a new window.' target='_new'>$name</a>.";
  } else {
    $html .= $name."."
  }
  #$html .= "<br />(this publisher is also known as $alias)" if ($alias);
  $html .= "<br />\nAccording to the Sherpa/Romeo database, the following conditions apply:</p><dl>";

  $html .= "<dd>".process_prints('pre', $publisher->first_child('preprints'))."</dd>\n";
  $html .= "<dd>".process_prints('post', $publisher->first_child('postprints'))."</dd>\n";
  $html .= "</dl>\n";
  # if we have any general conditions, we need to add them to the data-set
  if (scalar @conditions) {
    $html .= "<p>The publisher also defines the following general conditions</p>\n<dl>\n";
    foreach my $condition (@conditions) {
      $html .= "<dd>".$condition->text."</dd>\n";
    } ## end of foreach condition
    $html .= "</dl>\n";
  }; ## end of if conditions


  if (scalar @mandates) {
    $html .= "<p>Juliet has records on the following mandates:</p>\n<dl>\n";
    foreach my $mandate (@mandates) {
      my $funder = $mandate->first_child('funder');
      my $julieturl = $funder->first_child('julieturl')->text;
      if ( $funder->first_child('funderacronym') ) {
        $html .= "<dd><a title='Opens new window.' href='$julieturl' onclick=\"dialog = window.open('$julieturl','dialogwindow','directories=no,menubar=no,scrollbars=yes,taskbar=no,resizable=yes,location=no,status=no,toolbar=no;');dialog.focus(); return false\">".$funder->first_child('fundername')->text."'</a> (".$funder->first_child('funderacronym')->text.")</dd>\n";
      } else {
        $html .= "<dd><a title='Opens new window.' href='$julieturl' onclick=\"dialog = window.open('$julieturl','dialogwindow','directories=no,menubar=no,scrollbars=yes,taskbar=no,resizable=yes,location=no,status=no,toolbar=no;');dialog.focus(); return false\">".$funder->first_child('fundername')->text."</a></dd>\n";
      }
    }
    $html .= "</dl>\n";
  }
  
  $copyright =~ s/&lt;/</g if $copyright;
  $copyright =~ s/&gt;/>/g if $copyright;
#  $html .= "<p>The publisher has given $copyright for their copyright references.</p>\n";
  $html .= "</div>\n</td></tr></table>\n</div></div>";
  # save the html
  $publisher_data->{$name} = $html;

  1; 
} ## end process_journal


# get a list of journals that match the query
sub get_publisher {
  my $pubid = shift;
  my @html = ();


  if ($pubid) {
    my $query = "http://www.sherpa.ac.uk/romeo/api29.php?id=$pubid&ak=hC0DitNXMJA";;

    my $request = HTTP::Request->new( GET => "$query" );

    my $ua = LWP::UserAgent->new();
    my $response = $ua->request($request);
    my $content = $response->content();
    $publisher_data = {};
    my $twig = XML::Twig->new(
                       'keep_encoding' => 1,
                       'TwigRoots' => { 'publishers' => 1 },
                       'TwigHandlers' => { 'publisher' => \&process_publisher, }
                      );
    $twig->parse($content);
    if (scalar keys %{$publisher_data}) {
      foreach my $name (sort keys %{$publisher_data}) {
        push @html, "$publisher_data->{$name}\n";
      } ## end of  foreach my $name (sort keys %{$publisher_data})
    } ## end of if (scalar keys %{$publisher_data}) ...
  } else {
    push @html, "<!-- No pubid name supplied -->\n";
  }

  return (join "\n", @html)

} ## end get_publisher

my $path = $EPrints::SystemSettings::conf->{base_path}.'/var/romeopub';
foreach my $pub_id (  get_romeo_pub_ids() )
{
  open( PUBINFO, ">$path/$pub_id.xhtml" ) || die "failed to write";
  print PUBINFO get_publisher($pub_id);
  close PUBINFO;
}
use Data::Dumper;
open( PUBMAP, ">$path/map.txt" ) || die "failed to write map.pl";
foreach( keys %$pub_map )
{
	my $key = lc $pub_map->{$_};
	print PUBMAP "$key $_\n";
}
close PUBMAP;