diff --git a/misc-scripts/xref_mapping/XrefParser/RFAMParser.pm b/misc-scripts/xref_mapping/XrefParser/RFAMParser.pm index 41d3d2d105c6853a595e9fd1b1bafa0bd4fa22a0..7e43622c0a310e13eb370f93561028e4b22b4f03 100644 --- a/misc-scripts/xref_mapping/XrefParser/RFAMParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RFAMParser.pm @@ -9,25 +9,65 @@ use base qw( XrefParser::BaseParser ); use Bio::EnsEMBL::Registry; -sub run { +sub run_script { my ($self, $ref_arg) = @_; my $source_id = $ref_arg->{source_id}; my $species_id = $ref_arg->{species_id}; - my $files = $ref_arg->{files}; + my $file = $ref_arg->{file}; my $verbose = $ref_arg->{verbose}; - if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ + if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){ croak "Need to pass source_id, species_id and file as pairs"; } $verbose |=0; - my $file = @{$files}[0]; + my $wget = ""; + my $user = "ensro"; + my $host; + my $port = 3306; + my $dbname; + my $pass; + + if($file =~ /wget[=][>](\S+?)[,]/){ + $wget = $1; + } + if($file =~ /host[=][>](\S+?)[,]/){ + $host = $1; + } + if($file =~ /port[=][>](\S+?)[,]/){ + $port = $1; + } + if($file =~ /dbname[=][>](\S+?)[,]/){ + $dbname = $1; + } + if($file =~ /pass[=][>](\S+?)[,]/){ + $pass = $1; + } + if($file =~ /user[=][>](\S+?)[,]/){ + $user = $1; + } + #get direct RFAM xrefs from core my $registry = "Bio::EnsEMBL::Registry"; + my $dba; - $registry->load_registry_from_multiple_dbs( + #get the species name + my %id2name = $self->species_id2name; + my $species_name = $id2name{$species_id}[0]; + + if ($host) { + $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new( + '-host' => $host, + '-user' => $user, + '-pass' => $pass, + '-dbname' => $dbname, + '-species' => $species_name, + '-group' => 'core', + ); + } else { + $registry->load_registry_from_multiple_dbs( { '-host' => 'ens-staging1', '-user' => 'ensro', @@ -36,13 +76,9 @@ sub run { '-host' => 'ens-staging2', '-user' => 'ensro', }, - ); - - #get the species name - my %id2name = $self->species_id2name; - my $species_name = $id2name{$species_id}[0]; - - my $dba = $registry->get_DBAdaptor($species_name, 'core'); + ); + $dba = $registry->get_DBAdaptor($species_name, 'core'); + } my $rfam_sql = "select distinct t.stable_id, hit_name from analysis a join transcript t on (a.analysis_id = t.analysis_id and a.logic_name = 'ncRNA' and t.biotype != 'miRNA') join exon_transcript et on (t.transcript_id = et.transcript_id) join supporting_feature sf on (et.exon_id = sf.exon_id and sf.feature_type = 'dna_align_feature' ) join dna_align_feature df on (sf.feature_id = df.dna_align_feature_id) order by hit_name"; @@ -64,59 +100,52 @@ sub run { } $sth->finish; - my $file_io = $self->get_filehandle($file); - if ( !defined $file_io ) { - print STDERR "ERROR: Could not open $file\n"; - return 1; # 1 is an error + my $ua = LWP::UserAgent->new(); + $ua->timeout(10); + $ua->env_proxy(); + my $request = HTTP::Request->new(GET => $wget); + my $response = $ua->request($request); + + if ( !$response->is_success() ) { + warn($response->status_line); + return 1; } + my @lines = split(/\n\n/, $response->decoded_content); my @xrefs; + my $xref_count = 0; + my $direct_count = 0; - local $/ = "//\n"; - - - my $xref_count; - my $direct_count; - - while ($_ = $file_io->getline()) { + while (my $entry = shift @lines) { my $xref; - - my $entry = $_; chomp $entry; - next if (!$entry); - my ($accession) = $entry =~ /\n#=GF\sAC\s+(\w+)/; - my ($label) = $entry =~ /\n#=GF\sID\s+([^\n]+)/; + my ($accession) = $entry =~ /#=GF\sAC\s+(\w+)/ ; + my ($label) = $entry =~ /\n#=GF\sID\s+([^\n]+)/; my ($description) = $entry =~ /\n#=GF\sDE\s+([^\n]+)/; - - if (exists($rfam_transcript_stable_ids{$accession})){ - - #add xref - my $xref_id = $self->add_xref({ acc => $accession, - version => 0, + if ($accession) { + if (exists($rfam_transcript_stable_ids{$accession})){ + #add xref + my $xref_id = $self->add_xref({ acc => $accession, + version => 0, label => $label || $accession , desc => $description, source_id => $source_id, species_id => $species_id, info_type => "DIRECT"} ); - my @transcript_stable_ids = @{$rfam_transcript_stable_ids{$accession}}; - - foreach my $stable_id (@transcript_stable_ids){ - $self->add_direct_xref($xref_id, $stable_id, "Transcript", ""); - $direct_count++; - } - - $xref_count++; - + my @transcript_stable_ids = @{$rfam_transcript_stable_ids{$accession}}; + foreach my $stable_id (@transcript_stable_ids){ + $self->add_direct_xref($xref_id, $stable_id, "Transcript", ""); + $direct_count++; + } + $xref_count++; + } } - } - $file_io->close(); - print "Added $xref_count RFAM xrefs and $direct_count direct xrefs\n" if($verbose); if ( !$xref_count ) { return 1; # 1 error