#!/usr/bin/perl -w -I/opt/eprints2/perl_lib ###################################################################### # # Attempt to import eprints 1 data into eprints 2. # # This script is not an official part of the EPrints release but # is still distributed under the terms of the GNU Public License. # ###################################################################### use EPrints::Session; use EPrints::ImportXML; my $session = new EPrints::Session( 1, "cogprints", 2 ); exit( 1 ) unless defined $session; # The location of the XML files dumped from the v1 system. local $xmlbase = "/opt/eprints2/local/cogprints/eprintsdump"; # The directory containing disk0/ and any other document dirs # from the old system. local $datapath = "/opt/eprints2/local/cogprints/"; # user to use if the correct user is not found: local $adminid = 'cjg'; import_subjects( $session ); import_users( $session ); import_eprints( $session ); import_subscriptions( $session ); import_documents( $session ); $session->terminate(); exit; sub import_subjects { my( $session ) = @_; my $subds = $session->get_archive()->get_dataset( "subject" ); my $filename = $xmlbase."/subjects.xml"; print "Importing from $filename...\n"; my $count = 0; my $info = { count=>0 }; EPrints::ImportXML::import_file( $session , $filename , \&dealsubject, $subds, $info ); $count = $info->{count}; print "Done importing $count subjects from $filename\n"; print "Adding top level subject.\n"; EPrints::Subject::create( $session, "subjects", { en=>"Subject Areas" }, [ "ROOT" ], 0 ); print "Reindexing subject dataset to set ancestor data\n"; $subds->reindex( $session ); print "Done reindexing\n"; } sub dealsubject { my( $session , $dataset , $subject, $info ) = @_; print "Importing: ".$subject->get_value( "subjectid" )."\n"; my $parent = $subject->get_data->{"parent"}; $parent = "subjects" if( $parent eq "ROOT" ); $subject = EPrints::Subject::create( $session, $subject->get_value( "subjectid" ), $subject->get_value( "name" ), [ $parent ], $subject->get_value( "depositable" ) eq "TRUE" ); $info->{count}+=1; } ############################## sub import_users { my( $session ) = @_; $notes->{usermap} = {}; my $ds = $session->get_archive()->get_dataset( "user" ); my $filename = $xmlbase."/users.xml"; print "Importing from $filename...\n"; my $count = 0; my $info = { count=>0 }; EPrints::ImportXML::import_file( $session , $filename , \&dealuser, $ds, $info ); print "done users.\n"; } sub dealuser { my( $session , $dataset , $item, $info ) = @_; print "Importing: ".$item->get_value( "username" )."\n"; my $group = $item->{data}->{groups}; my $type = "user"; if( $group eq "Staff" ) { $type = "admin"; } my $user = EPrints::User::create_user( $session, $type ); my $data = $item->get_data; my %osmap = ( "Unspecified"=> "", "unspec" => "", "Windows/DOS"=> "win", "win9x" => "win", "UNIX"=> "unix", "VMS"=> "vms", "Macintosh"=> "mac", "Other" => "other"); $data->{os} = "" if !defined $data->{os}; if( defined $osmap{$data->{os}} ) { $data->{os} = $osmap{$data->{os}}; } $data->{password} = EPrints::Utils::crypt_password( $data->{passwd} ); $notes->{usermap}->{$data->{username}} = $user->{data}->{userid}; foreach( keys %{$data} ) { $user->{data}->{$_} = $data->{$_}; } $user->commit; $info->{count}+=1; } ######################################################### sub import_eprints { my( $session ) = @_; my $info = { maxid=>0 }; foreach( "archive", "inbox", "buffer", "deletion" ) { my $ds = $session->get_archive()->get_dataset( $_ ); my $file = $_; $file = "deletions" if( $_ eq "deletion" ); my $filename = $xmlbase."/$file.xml"; print "Importing from $filename...\n"; EPrints::ImportXML::import_file( $session , $filename , \&dealeprint, $ds, $info ); print "done $_.\n"; } my $sql = "update counters set counter=".($info->{maxid}+1)." where countername='eprintid'"; $session->get_db->do( $sql ); print "done eprints.\n"; } sub dealeprint { my( $session , $dataset , $item, $info ) = @_; print "Importing: ".$item->get_value( "eprintid" )."\n"; # don't want to mangle the origional data. my $data = $item->get_data; foreach( "eprintid", "succeeds", "commentary", "replacement" ) { next unless EPrints::Utils::is_set( $data->{$_} ); $data->{$_} =~ m/(\d+)$/; $data->{$_} = $1+0; } $data->{month} = '' if( defined $data->{month} && $data->{month} eq "unspec" ); $data->{username} = $adminid unless( defined $data->{username} ); $data->{userid} = $notes->{usermap}->{$data->{username}}; if( defined $data->{deletiondate} ) { $data->{datestamp} = $data->{deletiondate}; } $data->{replacedby} = $data->{replacement}; my $dir = EPrints::EPrint::_create_directory( $session, $data->{eprintid} ); $data->{dir} = $dir; my $success = $session->get_db()->add_record( $dataset, $data ); if( $data->{eprintid} > $info->{maxid} ) { $info->{maxid} = $data->{eprintid}; print "NEW MAX!\n"; } } ########################## sub import_subscriptions { my( $session ) = @_; my $ds = $session->get_archive()->get_dataset( "subscription" ); my $filename = $xmlbase."/subscriptions.xml"; print "Importing from $filename...\n"; my $info = {}; EPrints::ImportXML::import_file( $session , $filename , \&dealsubscriptions, $ds, $info ); print "done subscriptions.\n"; } sub dealsubscriptions { my( $session , $dataset , $item, $info ) = @_; my $data = $item->get_data; $data->{username} = $adminid unless( defined $data->{username} ); $data->{userid} = $notes->{usermap}->{$data->{username}}; my $ordermap = { order0 => "byname", order1 => "bytitle", order2 => "byyear", order3 => "byyearoldest" }; if( EPrints::Utils::is_set( $data->{spec} ) ) { # push @parts, $self->{allow_blank}?1:0; # push @parts, $self->{satisfy_all}?1:0; # push @parts, $self->{order}; # push @parts, $self->{dataset}->id(); # 0|1|bytitle|archive|-|ispublished:ANY:EQ:unpub inpress pub # [ALL][order0][subjects][comp-sci-speech:brain-img:neuro-ling:psy-ling:ANY][refereed][TRUE][ispublished][]' my @bits = split( '\]\[', substr( $data->{spec}, 1, length( $data->{spec} )-2 ) ); my $sersearch = '1|'; $sersearch .= (( shift @bits eq "ALL" ) ? "1" : "0")."|"; $sersearch .= $ordermap->{shift @bits}."|"; $sersearch .= "archive|-"; while( scalar @bits ) { my $field = shift @bits; my $v = shift @bits; next unless EPrints::Utils::is_set( $v ); my @p = split /:/, $v; my $any = "ANY"; if( $field eq "subjects" ) { $any = pop @p; } $sersearch .= "|$field:$any:EQ:".join( " ", @p ); } $data->{spec} = $sersearch; } $data->{subid} = $session->get_db()->counter_next( "subscriptionid" ); print "$data->{subid}\n"; # Add the subscription to the database my $x = $session->get_db()->add_record( $dataset, $data ); } ################################### sub import_documents { my( $session ) = @_; my $ds = $session->get_archive()->get_dataset( "document" ); my $filename = $xmlbase."/documents.xml"; print "Importing from $filename...\n"; my $info = {}; EPrints::ImportXML::import_file( $session , $filename , \&dealdocuments, $ds, $info ); print "done documents.\n"; } sub dealdocuments { my( $session , $dataset , $item, $info ) = @_; my $data = $item->get_data; $data->{eprintid} =~ m/(\d+)$/; $data->{eprintid} = $1+0; my $olddocid = $data->{docid}; $data->{docid} =~ m/(\d+)-(\d\d)$/; my $docpath=$2; $data->{docid} = ($1+0)."-".$2; $data->{format} = "\L$data->{format}"; my $eprint = EPrints::EPrint->new( $session, $data->{eprintid} ); my $oldpath = $datapath.'/'.$eprint->get_value( "dir" ).'/'.$olddocid; $session->get_db()->add_record( $dataset, $data ); my $doc = EPrints::Document->new( $session, $data->{docid} ); EPrints::Document::_create_directory( $data->{docid}, $eprint ); my $cmd = 'cp -R '.$oldpath.'/* '.$doc->local_path.'/'; print "$cmd\n"; `$cmd`; # Make secure area symlink my $linkdir = EPrints::Document::_secure_symlink_path( $eprint ); $doc->create_symlink( $eprint, $linkdir ); }