#!/usr/bin/perl -I/opt/eprints/perl_lib ###################################################################### # # dump an eprints 1 archive. # # This script is not an official part of the EPrints release but # is still distributed under the terms of the GNU Public License. # ###################################################################### #use EPrints::Database; #use EPrints::Deletion; #use EPrints::EPrint; #use EPrints::HTMLRender; use EPrints::Session; #use EPrints::Subject; use EPrints::SearchExpression; use EPrints::MetaInfo; use XML::Writer; use strict; # # # # New session my $session = new EPrints::Session( 1 ); my $dir = $EPrintSite::SiteInfo::local_root. "/eprintsdump" ; unless( -d $dir ) { `mkdir $dir`; } print $dir."\n"; my $table; foreach $table ( "subscriptions", "subjects", "users", "documents", "deletions", "archive", "buffer", "inbox" ) { print "Dumping $table\n"; my $output = new IO::File(">$dir/$table.xml"); print $output ''."\n"; my $writer = new XML::Writer( OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 2 ); my @fields; if( $table eq "subscriptions" ) { @fields = EPrints::MetaInfo::get_subscription_fields(); } elsif( $table eq "subjects" ) { @fields = EPrints::MetaInfo::get_subject_fields(); } elsif( $table eq "users" ) { @fields = EPrints::MetaInfo::get_user_fields(); } elsif( $table eq "documents" ) { @fields = EPrints::MetaInfo::get_document_fields(); } elsif( $table eq "deletions" ) { @fields = EPrints::MetaInfo::get_deletion_fields(); } elsif( $table eq "archive" || $table eq "buffer" || $table eq "inbox" ) { @fields = EPrints::MetaInfo::get_all_eprint_fields(); } else { die "unknown table"; } my @cols = (); foreach( @fields ) { push @cols, $_->{name}; } my $searchexp = new EPrints::SearchExpression( $session, $table, 1, 1, [], undef, undef ); $writer->startTag( "eprintsdata", "name"=>$table ); my $results = $searchexp->do_raw_search(\@cols); my $row; foreach $row ( @{$results} ) { $writer->startTag( "record" ); foreach( @fields ) { my $value = shift @{$row}; my @values = ( $value ); if( $_->{type} eq "multiurl" ) { @values = split /\s+/ , $value; } elsif( $_->{type} eq "name" || $_->{type} eq "set" || $_->{type} eq "subjects" ) { $value=~s/^://; $value=~s/:$//; @values = split /:/ , $value; } foreach $value ( @values ) { if( $_->{type} eq "name" ) { my( $family, $given ) = split /,/ , $value; $writer->startTag( "field" , "name"=>"$_->{name}" ); $writer->dataElement( "part" , &cleanup($family) , "name"=>"family" ); $writer->dataElement( "part" , &cleanup($given) , "name"=>"given" ); $writer->endTag( "field" ); } else { $writer->dataElement( "field" , &cleanup($value) , "name"=>"$_->{name}" ); } } } $writer->endTag( "record" ); } $writer->endTag( "eprintsdata" ); $writer->end(); $output->close(); } $session->terminate(); sub cleanup { my( $text ) = @_; # lost all chars < 32 except LF and CR $text =~ s/[\x00-\x09\x0b\x0c\x0e-\x1f]//g; return $text; }