diff --git a/misc-scripts/xref_mapping/XrefParser/WormPepParser.pm b/misc-scripts/xref_mapping/XrefParser/WormPepParser.pm index 7fd11c1d577ca10bec6bbef910a48f749d466652..63fa3b2e8ac690e19d6fc80633e49976abc92546 100644 --- a/misc-scripts/xref_mapping/XrefParser/WormPepParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/WormPepParser.pm @@ -10,109 +10,52 @@ use vars qw(@ISA); my $xref_sth ; my $dep_sth; -# -------------------------------------------------------------------------------- -# Parse command line and run if being run directly - -if (!defined(caller())) { - - if (scalar(@ARGV) != 1) { - print "\nUsage: WormPepParser.pm file <source_id> <species_id>\n\n"; - exit(1); - } +# wormpep.table file format: +#>B0025.1a CE24759 vps-34 phosphatidylinositol 3-kinase Confirmed SW:Q9TXI7 AAF23184.1 +#>B0025.1b CE24760 vps-34 Confirmed SW:Q9TXI6 AAF23185.1 +#>B0025.1c CE37691 vps-34 Confirmed SW:Q5TYK9 AAV34807.1 - run(@ARGV); -} +# Just need direct xref between B0025.1a (=stable ID for C. Elegans) and CE24759 sub run { - my $self = shift if (defined(caller(1))); - my $file = shift; - - my $source_id = shift; - my $species_id = shift; + my ($self, $file, $source_id, $species_id) = @_; print STDERR "WORMPep source = $source_id\tspecies = $species_id\n"; - if(!defined($source_id)){ - $source_id = XrefParser::BaseParser->get_source_id_for_filename($file); - } - if(!defined($species_id)){ - $species_id = XrefParser::BaseParser->get_species_id_for_filename($file); - } - - my $worm_source_id = XrefParser::BaseParser->get_source_id_for_source_name('wormbase_transcript'); - my (%worm) = %{XrefParser::BaseParser->get_valid_codes("wormbase_transcript",$species_id)}; + my $worm_source_id = XrefParser::BaseParser->get_source_id_for_source_name('wormpep_id'); - my (%swiss) = %{XrefParser::BaseParser->get_valid_codes("Uniprot",$species_id)}; + my $xref_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$worm_source_id AND species_id=$species_id"); - my $sql = "update xref set accession =? where xref_id=?"; - my $dbi = XrefParser::BaseParser->dbi(); - my $sth = $dbi->prepare($sql); + open(PEP,"<".$file) || die "Could not open $file\n"; + my ($x_count, $d_count); - my $sql2 = "select x2.accession, x2.xref_id "; - $sql2 .= "from dependent_xref d, xref x1, xref x2 "; - $sql2 .= "where d.master_xref_id = x1.xref_id and "; - $sql2 .= " d.dependent_xref_id = x2.xref_id and "; - $sql2 .= " x2.source_id = $worm_source_id and "; - $sql2 .= " x1.xref_id = ? and "; - $sql2 .= " x2.accession = ?"; - my $sth2 = $dbi->prepare($sql2); + while (<PEP>) { + my ($transcript, $wb) = (split(/\t/,substr($_,1)))[0,1]; - my $sql3 = 'delete from dependent_xref where dependent_xref.master_xref_id=? and dependent_xref.dependent_xref_id=?'; - my $sth3 = $dbi->prepare($sql3); + # reuse or create xref + my $xref_id; + $xref_sth->execute($wb); + my $xref_id = ($xref_sth->fetchrow_array())[0]; + if (!$xref_id) { + $xref_id = $self->add_xref($wb, undef, $wb, "", $worm_source_id, $species_id); + $x_count++; + } - open(PEP,"<".$file) || die "Could not open $file\n"; + # and direct xref + $self->add_direct_xref($xref_id, $transcript, "transcript", ""); - while (<PEP>) { - my ($transcript, $wb, $swiss_ref) = (split(/\t/,substr($_,1)))[0,1,5]; - my $swiss_xref; - if($swiss_ref =~ /SW:(.*)/){ - $swiss_ref = $1; - if(defined($swiss{$swiss_ref})){ - $swiss_xref = $swiss{$swiss_ref}; - my $diff =0; - my $gene; - if($transcript =~ /(\S+\.\d+)/){ - $gene = $1; - if($gene ne $transcript){ - $diff=1; - } - } - else{ - die "Gene format not recognised $transcript\n"; - } - - $sth2->execute($swiss_xref, $gene) || die $dbi->errstr; - (my $gene_acc, my $gene_xref) = $sth2->fetchrow_array(); - - $sth2->execute($swiss_xref, $transcript) || die $dbi->errstr; - (my $tran_acc, my $tran_xref) = $sth2->fetchrow_array(); - - my $create = 1; - if(defined($tran_xref)){ #okay - $create = 0; - } - elsif(defined($gene_xref)){ - #need to delete dependency - #then add new one with correct name - $sth3->execute($swiss_xref, $gene_xref) || die $dbi->errstr; - print "removing $swiss_ref -> $gene : "; - } - if($create){ - XrefParser::BaseParser->add_to_xrefs($swiss_xref,$transcript,'',$transcript,"","",$worm_source_id,$species_id); - print "adding $swiss_ref -> $transcript\n"; - } - - } - - } + $d_count++; } - + + close (PEP); + + print "Added $d_count direct xrefs and $x_count xrefs\n"; + } - sub new { my $self = {};