Commit 62da7628 authored by Emmanuel Mongin's avatar Emmanuel Mongin
Browse files

some minor changes

parent 2c80e62a
...@@ -19,20 +19,30 @@ my %map; ...@@ -19,20 +19,30 @@ my %map;
open (DBMAP,"$dbmap") || die "Can't open file $dbmap\n"; open (DBMAP,"$dbmap") || die "Can't open file $dbmap\n";
open (OUT,">$out") || die "Can't open file\n"; open (OUT,">$out") || die "Can't open file\n";
print STDERR "Reading DBmap\n";
while (<DBMAP>) { while (<DBMAP>) {
chomp; chomp;
my ($mapac,$mapdb) = split(/\t/,$_); my ($mapac,$mapdb) = split(/\t/,$_);
$map{$mapac} = $mapdb; $map{$mapac} = $mapdb;
} }
print STDERR "Reading SP\n";
my $in1 = Bio::SeqIO->new(-file => $sp, '-format' =>'swiss'); my $in1 = Bio::SeqIO->new(-file => $sp, '-format' =>'swiss');
while ( my $seq1 = $in1->next_seq() ) { while ( my $seq1 = $in1->next_seq() ) {
my $ac = $seq1->accession; my $ac = $seq1->accession;
my @dblink = $seq1->annotation->each_DBLink; my @dblink = $seq1->annotation->each_DBLink;
foreach my $link(@dblink) { foreach my $link(@dblink) {
if (($link->database eq "EMBL") || ($link->database eq "MIM")) { if (($link->database eq "EMBL") || ($link->database eq "MIM") || ($link->database eq "PDB")) {
if (!defined $map{$ac}) {
die "Can't map $ac\n";
}
print OUT "$map{$ac}\t$ac\t".$link->database."\t".$link->primary_id,"\n"; print OUT "$map{$ac}\t$ac\t".$link->database."\t".$link->primary_id,"\n";
} }
} }
......
...@@ -58,6 +58,8 @@ while (<ENS1>) { ...@@ -58,6 +58,8 @@ while (<ENS1>) {
#Get rid of the annoying carriage return! #Get rid of the annoying carriage return!
$_ =~ s/\r//g; $_ =~ s/\r//g;
my ($hgnc,$sp,$refseq) = split(/\t/,$_); my ($hgnc,$sp,$refseq) = split(/\t/,$_);
if ($sp) { if ($sp) {
$en1{$sp} = $hgnc; $en1{$sp} = $hgnc;
...@@ -108,10 +110,14 @@ while (<DBMAP>) { ...@@ -108,10 +110,14 @@ while (<DBMAP>) {
foreach my $sol (@syn) { foreach my $sol (@syn) {
#print the HUGOs aliases #print the HUGOs aliases
print OUT "$mapdb\t$mapac\tALIAS\t$sol\n"; print OUT "$mapdb\t$mapac\tHUGO\t$sol\n";
} }
} }
#if (!defined $en2{$en1{$mapac}}) {
# print STDERR "$mapac\n";
#}
} }
} }
......
...@@ -4,7 +4,7 @@ use strict; ...@@ -4,7 +4,7 @@ use strict;
use Getopt::Long; use Getopt::Long;
my ($refseq,$dbmap); my ($refseq,$dbmap,$out);
my %map; my %map;
...@@ -19,6 +19,8 @@ open (DBMAP,"$dbmap") || die "Can't open file $dbmap\n"; ...@@ -19,6 +19,8 @@ open (DBMAP,"$dbmap") || die "Can't open file $dbmap\n";
open (REFSEQ,"$refseq") || die "Can't open file $refseq\n"; open (REFSEQ,"$refseq") || die "Can't open file $refseq\n";
open (OUT,">$out") || die "Can't open file $out"; open (OUT,">$out") || die "Can't open file $out";
print STDERR "Reading dbmap\n";
while (<DBMAP>) { while (<DBMAP>) {
chomp; chomp;
my ($mapac,$mapdb) = split(/\t/,$_); my ($mapac,$mapdb) = split(/\t/,$_);
...@@ -27,9 +29,11 @@ while (<DBMAP>) { ...@@ -27,9 +29,11 @@ while (<DBMAP>) {
} }
#Separate by entry (each entry goes into $_)
$/ = "\/\/\n"; $/ = "\/\/\n";
print STDERR "Reading Refseq file\n";
while (<REFSEQ>) { while (<REFSEQ>) {
my ($prot_ac) = $_ =~ /ACCESSION\s+(\S+)/; my ($prot_ac) = $_ =~ /ACCESSION\s+(\S+)/;
my ($dna_ac) = $_ =~ /DBSOURCE REFSEQ: accession\s+(\w+)/; my ($dna_ac) = $_ =~ /DBSOURCE REFSEQ: accession\s+(\w+)/;
...@@ -39,10 +43,16 @@ while (<REFSEQ>) { ...@@ -39,10 +43,16 @@ while (<REFSEQ>) {
my ($locus) = $_ =~ /\/db_xref=\"LocusID:(\d*)/; my ($locus) = $_ =~ /\/db_xref=\"LocusID:(\d*)/;
if ($mim) { if ($mim) {
if (!defined $map{$dna_ac}) {
die "can't map $dna_ac\n";
}
print OUT "$map{$dna_ac}\t$dna_ac\tOMIM\t$mim\n"; print OUT "$map{$dna_ac}\t$dna_ac\tOMIM\t$mim\n";
} }
if ($locus) { if ($locus) {
if (!defined $map{$dna_ac}) {
die "can't map $dna_ac\n";
}
print OUT "$map{$dna_ac}\t$dna_ac\tLOCUS\t$locus\n"; print OUT "$map{$dna_ac}\t$dna_ac\tLOCUS\t$locus\n";
} }
} }
...@@ -51,3 +61,5 @@ while (<REFSEQ>) { ...@@ -51,3 +61,5 @@ while (<REFSEQ>) {
...@@ -57,13 +57,13 @@ sub postprocesspmatch { ...@@ -57,13 +57,13 @@ sub postprocesspmatch {
#Post process the raw data from pmatch #Post process the raw data from pmatch
if ($db eq $sp) { if ($db eq $sp) {
print STDERR "Postprocessing pmatch for SP mapping\n"; print STDERR "Postprocessing pmatch for SP mapping\n";
open (OUT, ">ens_sp.processed"); open (OUT, ">ens_sp.processed") || die "Can't open File\n";
open (PROC, "ens_sp_rawpmatch"); open (PROC, "ens_sp_rawpmatch") || die "Can't open File\n";
} }
else { else {
print STDERR "Postprocessing pmatch for REFSEQ mapping\n"; print STDERR "Postprocessing pmatch for REFSEQ mapping\n";
open (OUT, ">ens_refseq.processed"); open (OUT, ">ens_refseq.processed") || die "Can't open File\n";;
open (PROC, "ens_refseq_rawpmatch") || die "Can't open file ens_refseq_rawpmatch\n"; open (PROC, "ens_refseq_rawpmatch") || die "Can't open file ens_refseq_rawpmatch\n";
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment