Skip to content
Snippets Groups Projects
Commit e27dabf9 authored by Ian Longden's avatar Ian Longden
Browse files

Parser to add direct mapping fro swissprot entrys

parent 8195a408
No related branches found
No related tags found
No related merge requests found
package XrefParser::UniProtDirectParser;
use strict;
use DBI;
use base qw( XrefParser::BaseParser );
# Parse file of Uniprot records and assign direct xrefs
# All assumed to be linked to translation
my $verbose;
# --------------------------------------------------------------------------------
# Parse command line and run if being run directly
if (!defined(caller())) {
if (scalar(@ARGV) != 1) {
print "\nUsage: RefSeqParser.pm file.SPC <source_id> <species_id>\n\n";
exit(1);
}
run($ARGV[0], -1);
}
# --------------------------------------------------------------------------------
sub run {
my $self = shift if (defined(caller(1)));
my $source_id = shift;
my $species_id = shift;
my $files = shift;
my $rel_file = shift;
$verbose = shift;
my %prefix = (9606 => "ENSP0", 10090 => "ENSMUSP0", 10116 => "ENSRNOP0");
if(!defined($prefix{$species_id})){
print "No prefix known for this species $species_id???\n";
return 1;
}
my $filename = @{$files}[0];
my $file_io = $self->get_filehandle($filename);
if ( !defined($file_io) ) {
return 1;
}
my $parsed_count = 0;
my %prot2ensembl;
my $count = 0;
while ( defined( my $line = $file_io->getline() ) ) {
my ($prot, $ens) = split /\s+/,$line;
if($ens =~ /$prefix{$species_id}/){
push @{$prot2ensembl{$prot}}, $ens;
}
}
my $dbi = XrefParser::BaseParser->dbi();
my $sw_source_id = XrefParser::BaseParser->get_source_id_for_source_name("uniprot/swissprot","sequence_mapped");
if($sw_source_id < 1){
die "Could not find source id for uniprot/swissprot ???\n";
}
else{
print "Source_id = $sw_source_id\n";
}
my $get_desc_sth = $dbi->prepare("select xref_id, version, label, description from xref where source_id = $sw_source_id and accession = ?");
my $get_dependents_sth = $dbi->prepare("select dependent_xref_id, linkage_annotation, linkage_source_id from dependent_xref where master_xref_id = ?");
my $add_dependent_xref_sth = $dbi->prepare("INSERT INTO dependent_xref (master_xref_id,dependent_xref_id,linkage_annotation, linkage_source_id) VALUES (?,?,?,?)");
my $err_count;
foreach my $key (keys %prot2ensembl){
#
# get the descrptions etc for the uniprot entry
#
$get_desc_sth->execute($key);
my ($old_xref_id, $version, $label, $description);
$get_desc_sth->bind_columns(\$old_xref_id, \$version, \$label, \$description);
$get_desc_sth->fetch;
if(!defined($old_xref_id)){
print STDERR "Could not find $key in the database\n" if ($err_count <10);
$err_count++;
next;
}
$count++;
#
# get the dependents
#
my %linkage_anotation=();
my %linkage_source_id=();
my ($dependent_xref_id, $linkage_annotation, $linkage_source_id);
$get_dependents_sth->execute($old_xref_id);
$get_dependents_sth->bind_columns(\$dependent_xref_id, \$linkage_annotation, \$linkage_source_id);
while($get_dependents_sth->fetch){
$linkage_anotation{$dependent_xref_id} = $linkage_annotation;
$linkage_source_id{$dependent_xref_id} = $linkage_source_id;
}
# print $key."\t";
#
# Add the new xref
#
my $xref_id = XrefParser::BaseParser->add_xref($key, $version, $label, $description, $source_id, $species_id, "DIRECT");
foreach my $trans (@{$prot2ensembl{$key}}){
#
#add the direct xref entry
#
XrefParser::BaseParser->add_direct_xref( $xref_id, $trans, "Translation", '');
# print ":".$trans;
#
#add the dependents
#
foreach my $dep (keys %linkage_anotation){
$add_dependent_xref_sth->execute($xref_id, $dep, $linkage_anotation{$dep}, $linkage_source_id{$dep});
}
}
}
print $count." entrys added\n".$err_count." not found\n";
return 0;
}
1;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment