Commit 35f01da3 authored by Ian Longden's avatar Ian Longden
Browse files

Uniprot_genename source added

parent 96dfacc7
......@@ -35,6 +35,7 @@ if (!defined(caller())) {
# --------------------------------------------------------------------------------
sub run {
my $self = shift if (defined(caller(1)));
......@@ -167,11 +168,7 @@ sub create_xrefs {
my $num_sptr_pred = 0;
my %dependent_sources = $self->get_dependent_xref_sources(); # name-id hash
# get from HGNC file
# if(defined($dependent_sources{'HGNC'})){
# $dependent_sources{'HGNC'} = XrefParser::BaseParser->get_source_id_for_source_name("HGNC","uniprot");
# }
my %GeneNameSynonym;
if(defined($dependent_sources{'MGI'})){
$dependent_sources{'MGI'} = XrefParser::BaseParser->get_source_id_for_source_name("MGI","uniprot");
......@@ -293,8 +290,11 @@ sub create_xrefs {
foreach my $line (@all_lines) {
my ($accessions_only) = $line =~ /^AC\s+(.+)/;
push(@accessions, (split /;\s*/, $accessions_only)) if ($accessions_only);
}
$xref->{INFO_TYPE} = "SEQUENCE_MATCH";
$xref->{ACCESSION} = $accessions[0];
for (my $a=1; $a <= $#accessions; $a++) {
......@@ -393,6 +393,40 @@ sub create_xrefs {
$xref->{SEQUENCE} = $parsed_seq;
#print "Adding " . $xref->{ACCESSION} . " " . $xref->{LABEL} ."\n";
my ($gns) = $_ =~ /(GN\s+Name.+)/; # /s allows . to match newline
my @gn_lines = ();
if ( defined $gns ) { @gn_lines = split /\n/, $gns }
foreach my $gn (@gn_lines){
my $gene_name = undef;
my %depe;
if($gn =~ /Name=(\S+);/){
$depe{ACCESSION} = uc($1);
$gene_name = $depe{ACCESSION};
$depe{SOURCE_NAME} = "Uniprot_genename";
$depe{SOURCE_ID} = $dependent_sources{"Uniprot_genename"};
$depe{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID};
push @{$xref->{DEPENDENT_XREFS}}, \%depe;
$dependent_xrefs{"Uniprot_genename"}++;
my @syn;
if($gn =~ /Synonyms=([^;]+);/){
my $syn = $1;
$syn =~ s/\s+//g;
@syn= split(/,/,$syn);
foreach my $ent (@syn){
$GeneNameSynonym{$gene_name}{uc($ent)} = 1;
# print "$gene_name\t$ent\n";
}
}
}
}
my ($deps) = $_ =~ /(DR\s+.+)/s; # /s allows . to match newline
my @dep_lines = ();
if ( defined $deps ) { @dep_lines = split /\n/, $deps }
# dependent xrefs - only store those that are from sources listed in the source table
my ($deps) = $_ =~ /(DR\s+.+)/s; # /s allows . to match newline
......@@ -535,11 +569,31 @@ sub create_xrefs {
print "Read $num_sp SwissProt xrefs and $num_sptr SPTrEMBL xrefs from $file\n" if($verbose);
print "Found $num_sp_pred predicted SwissProt xrefs and $num_sptr_pred predicted SPTrEMBL xrefs\n" if (($num_sp_pred > 0 || $num_sptr_pred > 0) and $verbose);
# my $kount=0;
my $genename_source_id = $dependent_sources{"Uniprot_genename"};
foreach my $namekey (keys %GeneNameSynonym){
#add xref
my $xref_id = $self->add_xref($namekey,"",$namekey, "", $genename_source_id, $species_id,"DEPENDENT");
# $kount++;
# print $namekey."\t";
foreach my $synkey (keys %{$GeneNameSynonym{$namekey}}){
#add synonyms for xref
$self->add_synonym($xref_id, $synkey);
# print "$synkey, ";
}
# print "\n";
}
# print "$kount gene anmes added\n";
print "Added the following dependent xrefs:-\n" if($verbose);
foreach my $key (keys %dependent_xrefs){
print $key."\t".$dependent_xrefs{$key}."\n" if($verbose);
}
return \@xrefs;
#TODO - currently include records from other species - filter on OX line??
......
......@@ -2373,6 +2373,17 @@ prio_descr =
parser = UniProtParser
release_uri =
[source Uniprot_genename]
# Special source used in UniProtParser foir gene names..
name = Uniprot_genename
download = N
order = 20
priority = 1
prio_descr =
parser = UniProtParser
release_uri =
data_uri =
[source Uniprot/SWISSPROT::drosophila_melanogaster]
# Used by drosophila_melanogaster
name = Uniprot/SWISSPROT
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment