Skip to content
Snippets Groups Projects
Commit 96d7e747 authored by Magali Ruffier's avatar Magali Ruffier
Browse files

module modified to take in some database connection arguments

default behaviour is to use registry and staging, but can also use alternative database on other server
parent 5c9d3aa0
No related branches found
No related tags found
No related merge requests found
...@@ -9,25 +9,65 @@ use base qw( XrefParser::BaseParser ); ...@@ -9,25 +9,65 @@ use base qw( XrefParser::BaseParser );
use Bio::EnsEMBL::Registry; use Bio::EnsEMBL::Registry;
sub run { sub run_script {
my ($self, $ref_arg) = @_; my ($self, $ref_arg) = @_;
my $source_id = $ref_arg->{source_id}; my $source_id = $ref_arg->{source_id};
my $species_id = $ref_arg->{species_id}; my $species_id = $ref_arg->{species_id};
my $files = $ref_arg->{files}; my $file = $ref_arg->{file};
my $verbose = $ref_arg->{verbose}; my $verbose = $ref_arg->{verbose};
if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){
croak "Need to pass source_id, species_id and file as pairs"; croak "Need to pass source_id, species_id and file as pairs";
} }
$verbose |=0; $verbose |=0;
my $file = @{$files}[0]; my $wget = "";
my $user = "ensro";
my $host;
my $port = 3306;
my $dbname;
my $pass;
if($file =~ /wget[=][>](\S+?)[,]/){
$wget = $1;
}
if($file =~ /host[=][>](\S+?)[,]/){
$host = $1;
}
if($file =~ /port[=][>](\S+?)[,]/){
$port = $1;
}
if($file =~ /dbname[=][>](\S+?)[,]/){
$dbname = $1;
}
if($file =~ /pass[=][>](\S+?)[,]/){
$pass = $1;
}
if($file =~ /user[=][>](\S+?)[,]/){
$user = $1;
}
#get direct RFAM xrefs from core #get direct RFAM xrefs from core
my $registry = "Bio::EnsEMBL::Registry"; my $registry = "Bio::EnsEMBL::Registry";
my $dba;
$registry->load_registry_from_multiple_dbs( #get the species name
my %id2name = $self->species_id2name;
my $species_name = $id2name{$species_id}[0];
if ($host) {
$dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(
'-host' => $host,
'-user' => $user,
'-pass' => $pass,
'-dbname' => $dbname,
'-species' => $species_name,
'-group' => 'core',
);
} else {
$registry->load_registry_from_multiple_dbs(
{ {
'-host' => 'ens-staging1', '-host' => 'ens-staging1',
'-user' => 'ensro', '-user' => 'ensro',
...@@ -36,13 +76,9 @@ sub run { ...@@ -36,13 +76,9 @@ sub run {
'-host' => 'ens-staging2', '-host' => 'ens-staging2',
'-user' => 'ensro', '-user' => 'ensro',
}, },
); );
$dba = $registry->get_DBAdaptor($species_name, 'core');
#get the species name }
my %id2name = $self->species_id2name;
my $species_name = $id2name{$species_id}[0];
my $dba = $registry->get_DBAdaptor($species_name, 'core');
my $rfam_sql = "select distinct t.stable_id, hit_name from analysis a join transcript t on (a.analysis_id = t.analysis_id and a.logic_name = 'ncRNA' and t.biotype != 'miRNA') join exon_transcript et on (t.transcript_id = et.transcript_id) join supporting_feature sf on (et.exon_id = sf.exon_id and sf.feature_type = 'dna_align_feature' ) join dna_align_feature df on (sf.feature_id = df.dna_align_feature_id) order by hit_name"; my $rfam_sql = "select distinct t.stable_id, hit_name from analysis a join transcript t on (a.analysis_id = t.analysis_id and a.logic_name = 'ncRNA' and t.biotype != 'miRNA') join exon_transcript et on (t.transcript_id = et.transcript_id) join supporting_feature sf on (et.exon_id = sf.exon_id and sf.feature_type = 'dna_align_feature' ) join dna_align_feature df on (sf.feature_id = df.dna_align_feature_id) order by hit_name";
...@@ -64,59 +100,52 @@ sub run { ...@@ -64,59 +100,52 @@ sub run {
} }
$sth->finish; $sth->finish;
my $file_io = $self->get_filehandle($file); my $ua = LWP::UserAgent->new();
if ( !defined $file_io ) { $ua->timeout(10);
print STDERR "ERROR: Could not open $file\n"; $ua->env_proxy();
return 1; # 1 is an error my $request = HTTP::Request->new(GET => $wget);
my $response = $ua->request($request);
if ( !$response->is_success() ) {
warn($response->status_line);
return 1;
} }
my @lines = split(/\n\n/, $response->decoded_content);
my @xrefs; my @xrefs;
my $xref_count = 0;
my $direct_count = 0;
local $/ = "//\n"; while (my $entry = shift @lines) {
my $xref_count;
my $direct_count;
while ($_ = $file_io->getline()) {
my $xref; my $xref;
my $entry = $_;
chomp $entry; chomp $entry;
next if (!$entry); next if (!$entry);
my ($accession) = $entry =~ /\n#=GF\sAC\s+(\w+)/; my ($accession) = $entry =~ /#=GF\sAC\s+(\w+)/ ;
my ($label) = $entry =~ /\n#=GF\sID\s+([^\n]+)/; my ($label) = $entry =~ /\n#=GF\sID\s+([^\n]+)/;
my ($description) = $entry =~ /\n#=GF\sDE\s+([^\n]+)/; my ($description) = $entry =~ /\n#=GF\sDE\s+([^\n]+)/;
if ($accession) {
if (exists($rfam_transcript_stable_ids{$accession})){ if (exists($rfam_transcript_stable_ids{$accession})){
#add xref
#add xref my $xref_id = $self->add_xref({ acc => $accession,
my $xref_id = $self->add_xref({ acc => $accession, version => 0,
version => 0,
label => $label || $accession , label => $label || $accession ,
desc => $description, desc => $description,
source_id => $source_id, source_id => $source_id,
species_id => $species_id, species_id => $species_id,
info_type => "DIRECT"} ); info_type => "DIRECT"} );
my @transcript_stable_ids = @{$rfam_transcript_stable_ids{$accession}}; my @transcript_stable_ids = @{$rfam_transcript_stable_ids{$accession}};
foreach my $stable_id (@transcript_stable_ids){
foreach my $stable_id (@transcript_stable_ids){ $self->add_direct_xref($xref_id, $stable_id, "Transcript", "");
$self->add_direct_xref($xref_id, $stable_id, "Transcript", ""); $direct_count++;
$direct_count++; }
} $xref_count++;
}
$xref_count++;
} }
} }
$file_io->close();
print "Added $xref_count RFAM xrefs and $direct_count direct xrefs\n" if($verbose); print "Added $xref_count RFAM xrefs and $direct_count direct xrefs\n" if($verbose);
if ( !$xref_count ) { if ( !$xref_count ) {
return 1; # 1 error return 1; # 1 error
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment