Commit 0d19e0bd authored by Emmanuel Mongin's avatar Emmanuel Mongin
Browse files

mapping dev

E.
parent 0a56de8c
...@@ -39,14 +39,23 @@ while ( my $seq1 = $in1->next_seq() ) { ...@@ -39,14 +39,23 @@ while ( my $seq1 = $in1->next_seq() ) {
foreach my $link(@dblink) { foreach my $link(@dblink) {
if (($link->database eq "EMBL") || ($link->database eq "MIM")) { if ($link->database eq "EMBL") {
if (!defined $map{$ac}) {
die "Can't map $ac\n";
}
print OUT "$map{$ac}\t$ac\tEMBL_AC\t".$link->primary_id,"\n";
print OUT "$map{$ac}\t$ac\tEMBL_PROT_AC\t".$link->optional_id,"\n";
}
if ($link->database eq "MIM") {
if (!defined $map{$ac}) { if (!defined $map{$ac}) {
die "Can't map $ac\n"; die "Can't map $ac\n";
} }
print OUT "$map{$ac}\t$ac\t".$link->database."\t".$link->primary_id,"\n"; print OUT "$map{$ac}\t$ac\tOMIM\t".$link->primary_id,"\n";
} }
} }
} }
......
...@@ -3,43 +3,56 @@ use DBI; ...@@ -3,43 +3,56 @@ use DBI;
use Getopt::Long; use Getopt::Long;
use Bio::EnsEMBL::DBSQL::DBEntryAdaptor; use Bio::EnsEMBL::DBSQL::DBEntryAdaptor;
use Bio::EnsEMBL::DBEntry; use Bio::EnsEMBL::DBEntry;
use Bio::SeqIO;
my %hugosyn; my %hugosyn;
my %hugosymbol; my %hugoid;
my %scopsyn; my %scopsyn;
my %scopid;
my %gene_map; my %gene_map;
my %transcript_map; my %transcript_map;
my ($mapping, $hugosyn, $scopsyn, $out); my %spid;
my ($mapping, $hugosyn, $scopsyn, $out, $spsyn);
#perl ../../src/ensembl-live/misc-scripts/protein_match/load_mapping.pl -mapping outputs/final_sorted.map -hugosyn secondary/ens4.txt -scopsyn secondary/dir.dom.scop.txt_1.53 -spid primary/hum_sp_sptrembl.pep
&GetOptions( &GetOptions(
'mapping:s'=>\$mapping, 'mapping:s'=>\$mapping,
'hugosyn:s'=>\$hugosyn, 'hugosyn:s'=>\$hugosyn,
'scopsyn:s'=>\$scopsyn 'scopsyn:s'=>\$scopsyn,
'spid:s'=>\$spsyn
); );
my $dsn = "DBI:mysql:database=ensembl090_tmp;host=ecs1c"; my $dsn = "DBI:mysql:database=ensembl090_tmp;host=ecs1c";
my $db = DBI->connect("$dsn",'ensadmin') || die ("Could not connect to db!"); my $db = DBI->connect("$dsn",'ensadmin') || die ("Could not connect to db!");
my $adaptor = Bio::EnsEMBL::DBSQL::DBEntryAdaptor->new($db); my $adaptor = Bio::EnsEMBL::DBSQL::DBEntryAdaptor->new($db);
print STDERR "Getting SP mapping\n";
#my $in = Bio::SeqIO->new(-file => $spsyn, '-format' =>'swiss');
#while ( my $seq = $in->next_seq() ) {
my $ac;# = $seq->accession;
my $id;# = $seq->id;
$spid{$ac} = $id;
#}
#open (MAPS, "$map");
#while (<MAP>) {
# chomp;
# my ($transcript,$gen
#Read Hugo file to get out synonyms
open (HUGO, "$hugosyn") || die "Can't open file $mapping\n"; open (HUGO, "$hugosyn") || die "Can't open file $mapping\n";
while (<HUGO>) { while (<HUGO>) {
chomp; chomp;
#get red of the cariage return present in Hugos
$_ =~ s/\r//g;
my ($hgnc, $symbol, $alias, $withdrawn) = split (/\t/,$_); my ($hgnc, $symbol, $alias, $withdrawn) = split (/\t/,$_);
my @aliases = split (/, /,$alias); my @aliases = split (/, /,$alias);
my @withdrawns = split (/, /,$withdrawn); my @withdrawns = split (/, /,$withdrawn);
$hugosymbol{$symbol}=$hgnc; $hugoid{$hgnc}=$symbol;
foreach my $al(@aliases) { foreach my $al(@aliases) {
push(@{$hugosyn{$symbol}},$al); push(@{$hugosyn{$symbol}},$al);
...@@ -60,8 +73,15 @@ while (<SCOP>) { ...@@ -60,8 +73,15 @@ while (<SCOP>) {
#my $uni = "$pdb||$chain"; #my $uni = "$pdb||$chain";
push(@{$scopsyn{$scopac}},$pdb); #Set up the display id
push(@{$scopsyn{$scopac}},$chain); my $display = $pdb." ".$chain;
push (@{$scopid{$scopac}},$display);
#push(@{$scopsyn{$scopac}},$pdb);
#push(@{$scopsyn{$scopac}},$chain);
#Scop number becomes a synonym (not stable)
push(@{$scopsyn{$scopac}},$scopnb); push(@{$scopsyn{$scopac}},$scopnb);
} }
close (SCOP); close (SCOP);
...@@ -70,12 +90,20 @@ close (SCOP); ...@@ -70,12 +90,20 @@ close (SCOP);
open (MAPPING, "$mapping") || die "Can't open file $mapping\n"; open (MAPPING, "$mapping") || die "Can't open file $mapping\n";
while (<MAPPING>) { while (<MAPPING>) {
chomp; chomp;
$_ =~ s/\r//g;
my ($ens, $db, $primary_ac) = split(/\t/,$_); my ($ens, $db, $primary_ac) = split(/\t/,$_);
#Get SP mapping #Get SP mapping
if (($db ne "HUGOSYMBOL") && ($db ne "SCOP") && ($db ne "SCOP1") && ($db ne "HUGOID") && ($db ne "HUGOALIAS") && ($db ne "HUGOWITHDRAWN")) { #if (($db ne "HUGOSYMBOL") && ($db ne "SCOP") && ($db ne "SCOP1") && ($db ne "SCOP2") && ($db ne "HUGOID") && ($db ne "HUGOALIAS") && ($db ne "HUGOWITHDRAWN")) {
if (($db eq "EMBL") || ($db eq "EC") || ($db eq "OMIM") || ($db eq "REFSEQ") || ($db eq "LOCUS")) {
##############Temporary changes###########################
my ($ac1) = $ens =~ /COBP(\d+)/; my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1"; $ens = "COBT"."$ac1";
##########################################################
my $dbentry = Bio::EnsEMBL::DBEntry->new my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor, ( -adaptor => $adaptor,
-primary_id => $primary_ac, -primary_id => $primary_ac,
...@@ -85,28 +113,49 @@ while (<MAPPING>) { ...@@ -85,28 +113,49 @@ while (<MAPPING>) {
-dbname => $db ); -dbname => $db );
$adaptor->store($dbentry,$ens,"Gene"); $adaptor->store($dbentry,$ens,"Gene");
} }
if (($db eq "SP") || ($db eq "SPTREMBL")) {
if (!defined $spid{$primary_ac}) {
#print "SP primary Ac ($primary_ac) does not have an id\n";
}
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
-display_id => $spid{$primary_ac},
-version => 1,
-release => 1,
-dbname => $db );
}
if ($db eq "HUGOID") {
if ($db eq "HUGOSYMBOL") { ##################Temporary changes#######################
#print STDERR "HERE\n";
my ($ac1) = $ens =~ /COBP(\d+)/; my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1"; $ens = "COBT"."$ac1";
##########################################################
if (!defined $hugoid{$primary_ac}) {
print "Hugo primary Ac ($primary_ac) does not have an id\n";
}
my $dbentry = Bio::EnsEMBL::DBEntry->new my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor, ( -adaptor => $adaptor,
-primary_id => $primary_ac, -primary_id => $primary_ac,
-display_id => $primary_ac, -display_id => $hugoid{$primary_ac},
-version => 1, -version => 1,
-release => 1, -release => 1,
-dbname => $db ); -dbname => $db );
if ($hugosyn{$primary_ac}) { if ($hugosyn{$primary_ac}) {
my @synonyms = @{$hugosyn{$primary_ac}}; my @synonyms = @{$hugosyn{$primary_ac}};
#print STDERR "SYN: @synonyms\n";
foreach my $syn (@synonyms) { foreach my $syn (@synonyms) {
if ($syn =~ /\S+/) { if ($syn =~ /\S+/) {
#print STDERR "$syn\n";
$dbentry->add_synonym($syn); $dbentry->add_synonym($syn);
} }
} }
} }
...@@ -115,13 +164,21 @@ while (<MAPPING>) { ...@@ -115,13 +164,21 @@ while (<MAPPING>) {
$adaptor->store($dbentry,$ens,"Gene"); $adaptor->store($dbentry,$ens,"Gene");
} }
if ($db eq "SCOP") { if ($db eq "SCOP") {
my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1"; #############tmp########################
my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1";
########################################
if (!defined $scopid{$primary_ac}) {
print "SCOP primary Ac ($primary_ac) does not have an id\n";
}
my $dbentry = Bio::EnsEMBL::DBEntry->new my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor, ( -adaptor => $adaptor,
-primary_id => $primary_ac, -primary_id => $primary_ac,
-display_id => $primary_ac, -display_id => $scopid{$primary_ac},
-version => 1, -version => 1,
-release => 1, -release => 1,
-dbname => $db ); -dbname => $db );
...@@ -138,3 +195,6 @@ while (<MAPPING>) { ...@@ -138,3 +195,6 @@ while (<MAPPING>) {
} }
} }
...@@ -32,12 +32,12 @@ my ($ens,$sp,$refseq,$pdb); ...@@ -32,12 +32,12 @@ my ($ens,$sp,$refseq,$pdb);
); );
&runpmatch(); &runpmatch();
&postprocesspmatch($sp); #&postprocesspmatch($sp);
&postprocesspmatch($refseq); &postprocesspmatch($refseq);
&postprocesspmatch($pdb); #&postprocesspmatch($pdb);
&finalprocess($sp); #&finalprocess($sp);
&finalprocess($refseq); &finalprocess($refseq);
&finalprocess($pdb); #&finalprocess($pdb);
#perl ../../../src/ensembl-live/misc-scripts/protein_match/process_pmach.pl -ens ../primary/TGWpep -sp ../primary/SPTr.human.expanded -refseq ../primary/hs2.fsa -pdb ../primary/scop.fas #perl ../../../src/ensembl-live/misc-scripts/protein_match/process_pmach.pl -ens ../primary/TGWpep -sp ../primary/SPTr.human.expanded -refseq ../primary/hs2.fsa -pdb ../primary/scop.fas
...@@ -45,14 +45,14 @@ sub runpmatch { ...@@ -45,14 +45,14 @@ sub runpmatch {
print STDERR "Running pmatch\n"; print STDERR "Running pmatch\n";
#Run pmatch and store the data in files which will be kept for debugging #Run pmatch and store the data in files which will be kept for debugging
my $pmatch1 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $sp $ens > ens_sp_rawpmatch"; #my $pmatch1 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $sp $ens > ens_sp_rawpmatch";
my $pmatch2 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $refseq $ens > ens_refseq_rawpmatch"; my $pmatch2 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $refseq $ens > ens_refseq_rawpmatch";
my $pmatch3 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $pdb $ens > ens_pdb_rawpmatch"; #my $pmatch3 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $pdb $ens > ens_pdb_rawpmatch";
system($pmatch1); # == 0 or die "$0\Error running '$pmatch1' : $!"; #system($pmatch1); # == 0 or die "$0\Error running '$pmatch1' : $!";
system($pmatch2); #== 0 or die "$0\Error running '$pmatch2' : $!"; system($pmatch2); #== 0 or die "$0\Error running '$pmatch2' : $!";
system($pmatch3); #== 0 or die "$0\Error running '$pmatch2' : $!"; #system($pmatch3); #== 0 or die "$0\Error running '$pmatch2' : $!";
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment