Skip to content
Snippets Groups Projects
Commit d9d39b5e authored by Glenn Proctor's avatar Glenn Proctor
Browse files

Simplified since we're now using direct xrefs for elegans rather than mapping.

parent ff515ef8
No related branches found
No related tags found
No related merge requests found
...@@ -10,109 +10,52 @@ use vars qw(@ISA); ...@@ -10,109 +10,52 @@ use vars qw(@ISA);
my $xref_sth ; my $xref_sth ;
my $dep_sth; my $dep_sth;
# -------------------------------------------------------------------------------- # wormpep.table file format:
# Parse command line and run if being run directly #>B0025.1a CE24759 vps-34 phosphatidylinositol 3-kinase Confirmed SW:Q9TXI7 AAF23184.1
#>B0025.1b CE24760 vps-34 Confirmed SW:Q9TXI6 AAF23185.1
if (!defined(caller())) { #>B0025.1c CE37691 vps-34 Confirmed SW:Q5TYK9 AAV34807.1
if (scalar(@ARGV) != 1) {
print "\nUsage: WormPepParser.pm file <source_id> <species_id>\n\n";
exit(1);
}
run(@ARGV); # Just need direct xref between B0025.1a (=stable ID for C. Elegans) and CE24759
}
sub run { sub run {
my $self = shift if (defined(caller(1))); my ($self, $file, $source_id, $species_id) = @_;
my $file = shift;
my $source_id = shift;
my $species_id = shift;
print STDERR "WORMPep source = $source_id\tspecies = $species_id\n"; print STDERR "WORMPep source = $source_id\tspecies = $species_id\n";
if(!defined($source_id)){
$source_id = XrefParser::BaseParser->get_source_id_for_filename($file);
}
if(!defined($species_id)){
$species_id = XrefParser::BaseParser->get_species_id_for_filename($file);
}
my $worm_source_id = XrefParser::BaseParser->get_source_id_for_source_name('wormbase_transcript');
my (%worm) = %{XrefParser::BaseParser->get_valid_codes("wormbase_transcript",$species_id)}; my $worm_source_id = XrefParser::BaseParser->get_source_id_for_source_name('wormpep_id');
my (%swiss) = %{XrefParser::BaseParser->get_valid_codes("Uniprot",$species_id)}; my $xref_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$worm_source_id AND species_id=$species_id");
my $sql = "update xref set accession =? where xref_id=?"; open(PEP,"<".$file) || die "Could not open $file\n";
my $dbi = XrefParser::BaseParser->dbi();
my $sth = $dbi->prepare($sql);
my ($x_count, $d_count);
my $sql2 = "select x2.accession, x2.xref_id "; while (<PEP>) {
$sql2 .= "from dependent_xref d, xref x1, xref x2 ";
$sql2 .= "where d.master_xref_id = x1.xref_id and ";
$sql2 .= " d.dependent_xref_id = x2.xref_id and ";
$sql2 .= " x2.source_id = $worm_source_id and ";
$sql2 .= " x1.xref_id = ? and ";
$sql2 .= " x2.accession = ?";
my $sth2 = $dbi->prepare($sql2);
my ($transcript, $wb) = (split(/\t/,substr($_,1)))[0,1];
my $sql3 = 'delete from dependent_xref where dependent_xref.master_xref_id=? and dependent_xref.dependent_xref_id=?'; # reuse or create xref
my $sth3 = $dbi->prepare($sql3); my $xref_id;
$xref_sth->execute($wb);
my $xref_id = ($xref_sth->fetchrow_array())[0];
if (!$xref_id) {
$xref_id = $self->add_xref($wb, undef, $wb, "", $worm_source_id, $species_id);
$x_count++;
}
open(PEP,"<".$file) || die "Could not open $file\n"; # and direct xref
$self->add_direct_xref($xref_id, $transcript, "transcript", "");
while (<PEP>) { $d_count++;
my ($transcript, $wb, $swiss_ref) = (split(/\t/,substr($_,1)))[0,1,5];
my $swiss_xref;
if($swiss_ref =~ /SW:(.*)/){
$swiss_ref = $1;
if(defined($swiss{$swiss_ref})){
$swiss_xref = $swiss{$swiss_ref};
my $diff =0;
my $gene;
if($transcript =~ /(\S+\.\d+)/){
$gene = $1;
if($gene ne $transcript){
$diff=1;
}
}
else{
die "Gene format not recognised $transcript\n";
}
$sth2->execute($swiss_xref, $gene) || die $dbi->errstr;
(my $gene_acc, my $gene_xref) = $sth2->fetchrow_array();
$sth2->execute($swiss_xref, $transcript) || die $dbi->errstr;
(my $tran_acc, my $tran_xref) = $sth2->fetchrow_array();
my $create = 1;
if(defined($tran_xref)){ #okay
$create = 0;
}
elsif(defined($gene_xref)){
#need to delete dependency
#then add new one with correct name
$sth3->execute($swiss_xref, $gene_xref) || die $dbi->errstr;
print "removing $swiss_ref -> $gene : ";
}
if($create){
XrefParser::BaseParser->add_to_xrefs($swiss_xref,$transcript,'',$transcript,"","",$worm_source_id,$species_id);
print "adding $swiss_ref -> $transcript\n";
}
}
}
} }
close (PEP);
print "Added $d_count direct xrefs and $x_count xrefs\n";
} }
sub new { sub new {
my $self = {}; my $self = {};
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment