Skip to content
Snippets Groups Projects
Commit e327eeb1 authored by Ian Longden's avatar Ian Longden
Browse files

anopholese multi tax_id fix

parent 4160f046
No related branches found
No related tags found
No related merge requests found
......@@ -101,7 +101,7 @@ sub run
}
my $sql =
"SELECT s.source_id, su.source_url_id, s.name, su.url, su.checksum, su.parser, su.species_id, sp.name " .
"SELECT distinct(s.source_id), su.source_url_id, s.name, su.url, su.checksum, su.parser, su.species_id " .
"FROM source s, source_url su, species sp " .
"WHERE s.download='Y' AND su.source_id=s.source_id " .
"AND su.species_id=sp.species_id " .
......@@ -113,10 +113,10 @@ sub run
$sth->execute();
my ( $source_id, $source_url_id, $name, $url, $checksum, $parser,
$species_id, $species_name );
$species_id);
$sth->bind_columns( \$source_id, \$source_url_id, \$name, \$url,
\$checksum, \$parser, \$species_id, \$species_name );
\$checksum, \$parser, \$species_id);
my $last_type = "";
my $dir;
......@@ -154,7 +154,7 @@ sub run
print "Parsing $dsn with $parser\n";
eval "require XrefParser::$parser";
my $new = "XrefParser::$parser"->new();
if($new->run($dsn, $source_id, $species_id, $name, $species_name)){
if($new->run($dsn, $source_id, $species_id, $name, undef)){
$summary{$parser}++;
}
next;
......@@ -1241,16 +1241,18 @@ sub show_valid_species() {
sub get_taxonomy_from_species_id{
my ($self,$species_id) = @_;
my %hash;
my $dbi = dbi();
my $sth = $dbi->prepare("SELECT taxonomy_id FROM species WHERE species_id = $species_id");
$sth->execute() or croak( $dbi->errstr() );
if(my @row = $sth->fetchrow_array()) {
return $row[0];
while(my @row = $sth->fetchrow_array()) {
$hash{$row[0]} = 1;
}
$sth->finish;
return undef;
return \%hash;
}
sub get_direct_xref{
my ($self,$stable_id,$type,$link) = @_;
......
......@@ -34,7 +34,7 @@ sub run {
$species_id = XrefParser::BaseParser->get_species_id_for_filename($file);
}
my $species_tax_id = $self->get_taxonomy_from_species_id($species_id);
my %species_tax_id = %{$self->get_taxonomy_from_species_id($species_id)};
my $eg_io = $self->get_filehandle($file);
......@@ -92,7 +92,7 @@ sub run {
while ( $_ = $eg_io->getline() ) {
chomp;
my (@arr) = split(/\t/,$_);
if($arr[$gene_tax_id_index] != $species_tax_id){
if(!defined($species_tax_id{$arr[$gene_tax_id_index]})){
next;
}
my $acc = $arr[$gene_id_index];
......
......@@ -17,7 +17,7 @@ sub run {
my ($self, $source_id, $species_id, $file) = @_;
my $sio = Bio::SeqIO->new(-format=>'fasta' , -file=>$file );
my $species_tax_id = $self->get_taxonomy_from_species_id($species_id);
my %species_tax_id = %{$self->get_taxonomy_from_species_id($species_id)};
my @xrefs;
while( my $seq = $sio->next_seq ) {
......@@ -25,7 +25,7 @@ sub run {
# Test species if available
if( my $sp = $seq->species ){
if( my $tax_id = $sp->ncbi_taxid ){
next if $tax_id != $species_tax_id;
next if (!defined $species_tax_id{$tax_id});
}
}
......
......@@ -26,7 +26,7 @@ sub run {
return 1; # 1 = error
}
my $species_tax_id = $self->get_taxonomy_from_species_id($species_id);
my %species_tax_id = %{$self->get_taxonomy_from_species_id($species_id)};
while ( $_ = $ipi_io->getline() ) {
my $xref;
......@@ -43,7 +43,7 @@ sub run {
# note currently we ignore all the other cross-references in the IPI file
# only interested in species with the taxonomy ID were looking for
next if ( !defined $tax_id || $tax_id != $species_tax_id );
next if ( !defined $tax_id || !defined $species_tax_id{$tax_id});
# make sequence into one long string
$sequence =~ s/\n//g;
......
......@@ -31,7 +31,6 @@ sub run {
return 1; # 1 error
}
my $species_tax_id = $self->get_taxonomy_from_species_id($species_id);
my %swiss = %{ $self->get_valid_codes( "uniprot", $species_id ) };
my $missed = 0;
......
......@@ -28,8 +28,6 @@ sub run {
return 1; # 1 error
}
my $species_tax_id = $self->get_taxonomy_from_species_id($species_id);
while ( $_ = $file_io->getline() ) {
my $xref;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment