Skip to content
Snippets Groups Projects
Commit cbeca63a authored by Ian Longden's avatar Ian Longden
Browse files

check the species is correct. This will make the parsing a little slower for...

check the species is correct. This will make the parsing a little slower for those species specific files but the same modules can now be used if you have to parse a file with a mixture of species.
parent c6fa236a
No related branches found
No related tags found
No related merge requests found
......@@ -47,31 +47,35 @@ sub run {
open(GO,"<".$file) || die "Could not open $file\n";
my $taxon_line = "taxon:".$species_id;
while (<GO>) {
chomp;
my @array = split (/\t/,$_);
$array[9] =~ s/\'/\\\'/g;
my $master=0;
if($array[0] =~ /ENSEMBL/){
#these might be good for a check
# match GO to Uniprot
# match Uniprot to ENSEMBL
# check ENSEMBL's are the same.
}
elsif($array[0] =~ /RefSeq/){
if($refseq{$array[1]}){
XrefParser::BaseParser->add_to_xrefs($refseq{$array[1]},$array[4],'',$array[4],'',$array[6],$source_id,$species_id);
$count++;
if(/$taxon_line/){
chomp;
my @array = split (/\t/,$_);
$array[9] =~ s/\'/\\\'/g;
my $master=0;
if($array[0] =~ /ENSEMBL/){
#these might be good for a check
# match GO to Uniprot
# match Uniprot to ENSEMBL
# check ENSEMBL's are the same.
}
}
elsif($array[0] =~ /UniProt/){
if($swiss{$array[1]}){
XrefParser::BaseParser->add_to_xrefs($swiss{$array[1]},$array[4],'',$array[4],'',$array[6],$source_id,$species_id);
$count++;
elsif($array[0] =~ /RefSeq/){
if($refseq{$array[1]}){
XrefParser::BaseParser->add_to_xrefs($refseq{$array[1]},$array[4],'',$array[4],'',$array[6],$source_id,$species_id);
$count++;
}
}
elsif($array[0] =~ /UniProt/){
if($swiss{$array[1]}){
XrefParser::BaseParser->add_to_xrefs($swiss{$array[1]},$array[4],'',$array[4],'',$array[6],$source_id,$species_id);
$count++;
}
}
else{
print STDERR "unknown type ".$array[0]."\n";
}
}
else{
print STDERR "unknown type ".$array[0]."\n";
}
}
print "\t$count GO dependent xrefs added\n";
......
......@@ -17,7 +17,7 @@ use vars qw(@ISA);
if (!defined(caller())) {
if (scalar(@ARGV) != 1) {
print "\nUsage: RefSeqGPFFParser.pm file.SPC\n\n";
print "\nUsage: RefSeqGPFFParser.pm file.SPC <source_id>\n\n";
exit(1);
}
......@@ -32,13 +32,16 @@ sub run {
my $self = shift if (defined(caller(1)));
my $file = shift;
my $source_id = shift;
my $species_id = shift;
if ($source_id < 1) {
$source_id = XrefParser::BaseParser->get_source_id_for_filename(basename($file));
print "Source id for $file: $source_id\n";
}
if(!defined($species_id)){
$species_id = XrefParser::BaseParser->get_species_id_for_filename($file);
}
XrefParser::BaseParser->upload_xrefs(create_xrefs($source_id, $file));
XrefParser::BaseParser->upload_xrefs(create_xrefs($source_id, $file, $species_id));
}
......@@ -51,7 +54,7 @@ sub run {
sub create_xrefs {
my ($source_id, $file) = @_;
my ($source_id, $file, $species_id) = @_;
my %name2species_id = XrefParser::BaseParser->name2species_id();
......@@ -76,10 +79,10 @@ sub create_xrefs {
$species =~ s/^\s*//g;
$species =~ s/\s+/_/g;
$species =~ s/\n//g;
my $species_id = $name2species_id{$species};
my $species_id_check = $name2species_id{$species};
# skip xrefs for species that aren't in the species table
if (defined $species_id) {
if (defined ($species_id) and $species_id = $species_id_check) {
my ($acc) = $entry =~ /ACCESSION\s+(\S+)/;
my ($ver) = $entry =~ /VERSION\s+(\S+)/;
......@@ -163,7 +166,10 @@ sub create_xrefs {
}
push @xrefs, $xref;
} # if defined species
}# if defined species
else{ #### REMOVE after TESTING
print "not correct $species $species_id NE $species_id_check\n";
}
} # while <REFSEQ>
......
......@@ -17,7 +17,7 @@ use vars qw(@ISA);
if (!defined(caller())) {
if (scalar(@ARGV) != 1) {
print "\nUsage: RefSeqParser.pm file.SPC\n\n";
print "\nUsage: RefSeqParser.pm file.SPC <source_id> <species_id>\n\n";
exit(1);
}
......@@ -32,13 +32,16 @@ sub run {
my $self = shift if (defined(caller(1)));
my $file = shift;
my $source_id = shift;
my $species_id = shift;
if ($source_id < 1) {
if (!defined($source_id) or $source_id < 1) {
$source_id = XrefParser::BaseParser->get_source_id_for_filename(basename($file));
print "Source id for $file: $source_id\n";
}
if(!defined($species_id)){
$species_id = XrefParser::BaseParser->get_species_id_for_filename($file);
}
XrefParser::BaseParser->upload_xrefs(create_xrefs($source_id, $file));
XrefParser::BaseParser->upload_xrefs(create_xrefs($source_id, $file, $species_id));
}
......@@ -51,7 +54,7 @@ sub run {
sub create_xrefs {
my ($source_id, $file) = @_;
my ($source_id, $file, $species_id) = @_;
my %name2species_id = XrefParser::BaseParser->name2species_id();
......@@ -92,10 +95,10 @@ sub create_xrefs {
$species = lc $species;
$species =~ s/ /_/;
my $species_id = $name2species_id{$species};
my $species_id_check = $name2species_id{$species};
# skip xrefs for species that aren't in the species table
if (defined $species_id) {
if (defined($species_id) and $species_id = $species_id_check) {
my ($acc_no_ver,$ver) = split (/\./,$acc);
$xref->{ACCESSION} = $acc_no_ver;
......@@ -111,6 +114,9 @@ sub create_xrefs {
push @xrefs, $xref;
}
else{
print "not correct species $species_id NE $species_id_check\n";
}
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment