Commit 0d19e0bd authored by Emmanuel Mongin's avatar Emmanuel Mongin
Browse files

mapping dev

E.
parent 0a56de8c
......@@ -39,14 +39,23 @@ while ( my $seq1 = $in1->next_seq() ) {
foreach my $link(@dblink) {
if (($link->database eq "EMBL") || ($link->database eq "MIM")) {
if ($link->database eq "EMBL") {
if (!defined $map{$ac}) {
die "Can't map $ac\n";
}
print OUT "$map{$ac}\t$ac\tEMBL_AC\t".$link->primary_id,"\n";
print OUT "$map{$ac}\t$ac\tEMBL_PROT_AC\t".$link->optional_id,"\n";
}
if ($link->database eq "MIM") {
if (!defined $map{$ac}) {
die "Can't map $ac\n";
}
print OUT "$map{$ac}\t$ac\t".$link->database."\t".$link->primary_id,"\n";
print OUT "$map{$ac}\t$ac\tOMIM\t".$link->primary_id,"\n";
}
}
}
......
......@@ -3,20 +3,24 @@ use DBI;
use Getopt::Long;
use Bio::EnsEMBL::DBSQL::DBEntryAdaptor;
use Bio::EnsEMBL::DBEntry;
use Bio::SeqIO;
my %hugosyn;
my %hugosymbol;
my %hugoid;
my %scopsyn;
my %scopid;
my %gene_map;
my %transcript_map;
my ($mapping, $hugosyn, $scopsyn, $out);
my %spid;
my ($mapping, $hugosyn, $scopsyn, $out, $spsyn);
#perl ../../src/ensembl-live/misc-scripts/protein_match/load_mapping.pl -mapping outputs/final_sorted.map -hugosyn secondary/ens4.txt -scopsyn secondary/dir.dom.scop.txt_1.53 -spid primary/hum_sp_sptrembl.pep
&GetOptions(
'mapping:s'=>\$mapping,
'hugosyn:s'=>\$hugosyn,
'scopsyn:s'=>\$scopsyn
'scopsyn:s'=>\$scopsyn,
'spid:s'=>\$spsyn
);
my $dsn = "DBI:mysql:database=ensembl090_tmp;host=ecs1c";
......@@ -25,21 +29,30 @@ my $db = DBI->connect("$dsn",'ensadmin') || die ("Could not connect to db!");
my $adaptor = Bio::EnsEMBL::DBSQL::DBEntryAdaptor->new($db);
#open (MAPS, "$map");
#while (<MAP>) {
# chomp;
# my ($transcript,$gen
#Read Hugo file to get out synonyms
print STDERR "Getting SP mapping\n";
#my $in = Bio::SeqIO->new(-file => $spsyn, '-format' =>'swiss');
#while ( my $seq = $in->next_seq() ) {
my $ac;# = $seq->accession;
my $id;# = $seq->id;
$spid{$ac} = $id;
#}
open (HUGO, "$hugosyn") || die "Can't open file $mapping\n";
while (<HUGO>) {
chomp;
#get red of the cariage return present in Hugos
$_ =~ s/\r//g;
my ($hgnc, $symbol, $alias, $withdrawn) = split (/\t/,$_);
my @aliases = split (/, /,$alias);
my @withdrawns = split (/, /,$withdrawn);
$hugosymbol{$symbol}=$hgnc;
$hugoid{$hgnc}=$symbol;
foreach my $al(@aliases) {
push(@{$hugosyn{$symbol}},$al);
......@@ -60,8 +73,15 @@ while (<SCOP>) {
#my $uni = "$pdb||$chain";
push(@{$scopsyn{$scopac}},$pdb);
push(@{$scopsyn{$scopac}},$chain);
#Set up the display id
my $display = $pdb." ".$chain;
push (@{$scopid{$scopac}},$display);
#push(@{$scopsyn{$scopac}},$pdb);
#push(@{$scopsyn{$scopac}},$chain);
#Scop number becomes a synonym (not stable)
push(@{$scopsyn{$scopac}},$scopnb);
}
close (SCOP);
......@@ -70,12 +90,20 @@ close (SCOP);
open (MAPPING, "$mapping") || die "Can't open file $mapping\n";
while (<MAPPING>) {
chomp;
$_ =~ s/\r//g;
my ($ens, $db, $primary_ac) = split(/\t/,$_);
#Get SP mapping
if (($db ne "HUGOSYMBOL") && ($db ne "SCOP") && ($db ne "SCOP1") && ($db ne "HUGOID") && ($db ne "HUGOALIAS") && ($db ne "HUGOWITHDRAWN")) {
#if (($db ne "HUGOSYMBOL") && ($db ne "SCOP") && ($db ne "SCOP1") && ($db ne "SCOP2") && ($db ne "HUGOID") && ($db ne "HUGOALIAS") && ($db ne "HUGOWITHDRAWN")) {
if (($db eq "EMBL") || ($db eq "EC") || ($db eq "OMIM") || ($db eq "REFSEQ") || ($db eq "LOCUS")) {
##############Temporary changes###########################
my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1";
##########################################################
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
......@@ -86,27 +114,48 @@ while (<MAPPING>) {
$adaptor->store($dbentry,$ens,"Gene");
}
if ($db eq "HUGOSYMBOL") {
#print STDERR "HERE\n";
if (($db eq "SP") || ($db eq "SPTREMBL")) {
if (!defined $spid{$primary_ac}) {
#print "SP primary Ac ($primary_ac) does not have an id\n";
}
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
-display_id => $spid{$primary_ac},
-version => 1,
-release => 1,
-dbname => $db );
}
if ($db eq "HUGOID") {
##################Temporary changes#######################
my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1";
##########################################################
if (!defined $hugoid{$primary_ac}) {
print "Hugo primary Ac ($primary_ac) does not have an id\n";
}
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
-display_id => $primary_ac,
-display_id => $hugoid{$primary_ac},
-version => 1,
-release => 1,
-dbname => $db );
if ($hugosyn{$primary_ac}) {
my @synonyms = @{$hugosyn{$primary_ac}};
#print STDERR "SYN: @synonyms\n";
foreach my $syn (@synonyms) {
if ($syn =~ /\S+/) {
#print STDERR "$syn\n";
$dbentry->add_synonym($syn);
}
}
}
......@@ -115,13 +164,21 @@ while (<MAPPING>) {
$adaptor->store($dbentry,$ens,"Gene");
}
if ($db eq "SCOP") {
if ($db eq "SCOP") {
#############tmp########################
my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1";
########################################
if (!defined $scopid{$primary_ac}) {
print "SCOP primary Ac ($primary_ac) does not have an id\n";
}
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
-display_id => $primary_ac,
-display_id => $scopid{$primary_ac},
-version => 1,
-release => 1,
-dbname => $db );
......@@ -138,3 +195,6 @@ while (<MAPPING>) {
}
}
......@@ -32,12 +32,12 @@ my ($ens,$sp,$refseq,$pdb);
);
&runpmatch();
&postprocesspmatch($sp);
#&postprocesspmatch($sp);
&postprocesspmatch($refseq);
&postprocesspmatch($pdb);
&finalprocess($sp);
#&postprocesspmatch($pdb);
#&finalprocess($sp);
&finalprocess($refseq);
&finalprocess($pdb);
#&finalprocess($pdb);
#perl ../../../src/ensembl-live/misc-scripts/protein_match/process_pmach.pl -ens ../primary/TGWpep -sp ../primary/SPTr.human.expanded -refseq ../primary/hs2.fsa -pdb ../primary/scop.fas
......@@ -45,14 +45,14 @@ sub runpmatch {
print STDERR "Running pmatch\n";
#Run pmatch and store the data in files which will be kept for debugging
my $pmatch1 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $sp $ens > ens_sp_rawpmatch";
#my $pmatch1 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $sp $ens > ens_sp_rawpmatch";
my $pmatch2 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $refseq $ens > ens_refseq_rawpmatch";
my $pmatch3 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $pdb $ens > ens_pdb_rawpmatch";
#my $pmatch3 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $pdb $ens > ens_pdb_rawpmatch";
system($pmatch1); # == 0 or die "$0\Error running '$pmatch1' : $!";
#system($pmatch1); # == 0 or die "$0\Error running '$pmatch1' : $!";
system($pmatch2); #== 0 or die "$0\Error running '$pmatch2' : $!";
system($pmatch3); #== 0 or die "$0\Error running '$pmatch2' : $!";
#system($pmatch3); #== 0 or die "$0\Error running '$pmatch2' : $!";
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment