Commit f4359138 authored by Emmanuel Mongin's avatar Emmanuel Mongin
Browse files

Set of script with the changes I've used for the mapping

parent 054348b2
......@@ -45,7 +45,9 @@ while ( my $seq1 = $in1->next_seq() ) {
}
print OUT "$map{$ac}\t$ac\tEMBL_AC\t".$link->primary_id,"\n";
print OUT "$map{$ac}\t$ac\tEMBL_PROT_AC\t".$link->optional_id,"\n";
my ($protac) = $link->optional_id =~ /^(\w+).\S+/;
print OUT "$map{$ac}\t$ac\tEMBL_PROT_AC\t$protac\n";
}
if ($link->database eq "MIM") {
if (!defined $map{$ac}) {
......@@ -57,6 +59,8 @@ while ( my $seq1 = $in1->next_seq() ) {
}
}
......
......@@ -39,7 +39,7 @@ my %embl_clone;
'output:s'=>\$out
);
#perl ../../../src/ensembl-live/misc-scripts/protein_match/get_xrefs.pl -mapping ../map_outputs/totalmap.final -xrefs ../sec_outputs/xrefs.map -dbmap ../sec_outputs/mapdb.map -refseq ../primary/hs.gnp -output final.map
#perl ../../../src/ensembl-live/misc-scripts/protein_match/get_xrefs.pl -mapping ../map_outputs/map.total -xrefs ../sec_outputs/xref.total -dbmap ../sec_outputs/mapdb.map -refseq ../primary/hs.gnp -output final.map
open (DBMAP,"$dbmap") || die "Can't open file $dbmap\n";
open (XREF,"$xrefs") || die "Can't open file $xrefs\n";
......@@ -52,12 +52,12 @@ open (OUT,">$out") || die "Can't open file $out\n";
#open (CLONE,"clones.txt") || die "Can't open file\n";
#Put in a hash all of the embl clones used by Ensembl
while (<CLONE>) {
chomp;
my ($embl_ac,$id) = split(/\t/,$_);
print "$embl_ac\n";
$embl_clone{$embl_ac}=1;
}
#while (<CLONE>) {
# chomp;
# my ($embl_ac,$id) = split(/\t/,$_);
# print "$embl_ac\n";
# $embl_clone{$embl_ac}=1;
#}
while (<DBMAP>) {
......@@ -117,7 +117,9 @@ while (<MAP>) {
#P01111 COBP00000000001 100 PRIMARY
my ($xr,$ens,$perc,$tag) = split (/\t/,$_);
if ($xr =~ /^\w+-\d{2}/) {
($xr) = $xr =~ /^(\w+)-\d{2}/;
}
#Hack to be taken away
#my ($en1,$en2) = $ens =~ /(\w{3})P(\d+)/;
#my $enst = $en1."T".$en2;
......@@ -161,10 +163,18 @@ while (<MAP>) {
#Print the know gene AC and its database
#print OUT "$ens\t$map{$xr}\t$xr\n";
#}
if (!defined $map{$xr}) {
print STDERR "can't find primary $xr\n";
}
print OUT "$ens\t$map{$xr}\t$xr\n";
#Print all of the external database it links to (eg: HUGO)
foreach my $both (@{$hash{$xr}}){
if (!defined $hash{$xr}) {
print STDERR "can't find Xref $xr\n";
}
($a,$b) = split(/&/,$both);
print OUT "$ens\t$a\t$b\n";
}
......
......@@ -12,18 +12,18 @@ my %scopid;
my %gene_map;
my %transcript_map;
my %spid;
my ($mapping, $hugosyn, $scopsyn, $out, $spsyn);
my ($mapping, $hugosyns, $scopsyn, $out, $spsyn);
#perl ../../src/ensembl-live/misc-scripts/protein_match/load_mapping.pl -mapping outputs/final_sorted.map -hugosyn secondary/ens4.txt -scopsyn secondary/dir.dom.scop.txt_1.53 -spid primary/hum_sp_sptrembl.pep
#perl ../../src/ensembl-live/misc-scripts/protein_match/load_mapping.pl -mapping outputs/final_sorted.map -hugosyn secondary/nomeid.txt -scopsyn secondary/dir.dom.scop.txt_1.53 -spid primary/hum_sp_sptrembl.pep
&GetOptions(
'mapping:s'=>\$mapping,
'hugosyn:s'=>\$hugosyn,
'hugosyn:s'=>\$hugosyns,
'scopsyn:s'=>\$scopsyn,
'spid:s'=>\$spsyn
);
my $dsn = "DBI:mysql:database=ensembl090_tmp;host=ecs1c";
my $dsn = "DBI:mysql:database=xrefs100_tmp;host=ecs1c";
my $db = DBI->connect("$dsn",'ensadmin') || die ("Could not connect to db!");
my $adaptor = Bio::EnsEMBL::DBSQL::DBEntryAdaptor->new($db);
......@@ -32,35 +32,42 @@ my $adaptor = Bio::EnsEMBL::DBSQL::DBEntryAdaptor->new($db);
print STDERR "Getting SP mapping\n";
#my $in = Bio::SeqIO->new(-file => $spsyn, '-format' =>'swiss');
my $in = Bio::SeqIO->new(-file => $spsyn, '-format' =>'swiss');
#while ( my $seq = $in->next_seq() ) {
my $ac;# = $seq->accession;
my $id;# = $seq->id;
while ( my $seq = $in->next_seq() ) {
my $ac = $seq->accession;
my $id = $seq->id;
$spid{$ac} = $id;
#}
}
open (HUGO, "$hugosyn") || die "Can't open file $mapping\n";
open (HUGO, "$hugosyns") || die "Can't open file $mapping\n";
while (<HUGO>) {
chomp;
#get red of the cariage return present in Hugos
$_ =~ s/\r//g;
my ($hgnc, $symbol, $alias, $withdrawn) = split (/\t/,$_);
#my ($hgnc, $symbol, $alias, $withdrawn) = split (/\t/,$_);
my @hug = split (/\t/,$_);
my $hgnc = $hug[0];
my $symbol = $hug[1];
my $alias = $hug[8];
my @aliases = split (/, /,$alias);
my @withdrawns = split (/, /,$withdrawn);
#my @withdrawns = split (/, /,$withdrawn);
$hugoid{$hgnc}=$symbol;
foreach my $al(@aliases) {
push(@{$hugosyn{$symbol}},$al);
}
foreach my $wi(@withdrawns) {
push(@{$hugosyn{$symbol}},$wi);
push(@{$hugosyn{$hgnc}},$al);
}
#foreach my $wi(@withdrawns) {
#push(@{$hugosyn{$symbol}},$wi);
#}
}
close (HUGO);
......@@ -76,7 +83,9 @@ while (<SCOP>) {
#Set up the display id
my $display = $pdb." ".$chain;
push (@{$scopid{$scopac}},$display);
#push (@{$scopid{$scopac}},$display);
$scopid{$scopac} = $display;
#push(@{$scopsyn{$scopac}},$pdb);
#push(@{$scopsyn{$scopac}},$chain);
......@@ -95,12 +104,12 @@ while (<MAPPING>) {
#Get SP mapping
#if (($db ne "HUGOSYMBOL") && ($db ne "SCOP") && ($db ne "SCOP1") && ($db ne "SCOP2") && ($db ne "HUGOID") && ($db ne "HUGOALIAS") && ($db ne "HUGOWITHDRAWN")) {
if (($db eq "EMBL") || ($db eq "EC") || ($db eq "OMIM") || ($db eq "REFSEQ") || ($db eq "LOCUS")) {
if (($db eq "EMBL_AC") || ($db eq "EMBL_PROT_AC") || ($db eq "EC") || ($db eq "OMIM") || ($db eq "REFSEQ") || ($db eq "LOCUS")) {
##############Temporary changes###########################
my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1";
#my ($ac1) = $ens =~ /COBP(\d+)/;
#$ens = "COBT"."$ac1";
##########################################################
......@@ -111,7 +120,7 @@ while (<MAPPING>) {
-version => 1,
-release => 1,
-dbname => $db );
$adaptor->store($dbentry,$ens,"Gene");
$adaptor->store($dbentry,$ens,"Translation");
}
......@@ -129,13 +138,14 @@ while (<MAPPING>) {
-version => 1,
-release => 1,
-dbname => $db );
$adaptor->store($dbentry,$ens,"Translation");
}
if ($db eq "HUGOID") {
##################Temporary changes#######################
my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1";
#my ($ac1) = $ens =~ /COBP(\d+)/;
#$ens = "COBT"."$ac1";
##########################################################
if (!defined $hugoid{$primary_ac}) {
......@@ -150,8 +160,12 @@ while (<MAPPING>) {
-version => 1,
-release => 1,
-dbname => $db );
if ($hugosyn{$primary_ac}) {
my @synonyms = @{$hugosyn{$primary_ac}};
#print STDERR "SYN: @synonyms\t$primary_ac\n";
foreach my $syn (@synonyms) {
if ($syn =~ /\S+/) {
......@@ -161,24 +175,27 @@ while (<MAPPING>) {
}
$adaptor->store($dbentry,$ens,"Gene");
$adaptor->store($dbentry,$ens,"Translation");
}
if ($db eq "SCOP") {
if ($db eq "SCOP") {
my $dspl;
#############tmp########################
my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1";
#my ($ac1) = $ens =~ /COBP(\d+)/;
#$ens = "COBT"."$ac1";
########################################
$dspl = $scopid{$primary_ac};
if (!defined $scopid{$primary_ac}) {
print "SCOP primary Ac ($primary_ac) does not have an id\n";
($dspl) = $primary_ac =~ /\w{1}(\w{4})/;
}
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
-display_id => $scopid{$primary_ac},
-display_id => $dspl,
-version => 1,
-release => 1,
-dbname => $db );
......@@ -190,7 +207,7 @@ if ($db eq "SCOP") {
}
}
}
$adaptor->store($dbentry,$ens,"Gene");
$adaptor->store($dbentry,$ens,"Translation");
}
......@@ -198,3 +215,10 @@ if ($db eq "SCOP") {
......@@ -32,27 +32,27 @@ my ($ens,$sp,$refseq,$pdb);
);
&runpmatch();
#&postprocesspmatch($sp);
&postprocesspmatch($sp);
&postprocesspmatch($refseq);
#&postprocesspmatch($pdb);
#&finalprocess($sp);
&postprocesspmatch($pdb);
&finalprocess($sp);
&finalprocess($refseq);
#&finalprocess($pdb);
&finalprocess($pdb);
#perl ../../../src/ensembl-live/misc-scripts/protein_match/process_pmach.pl -ens ../primary/TGWpep -sp ../primary/SPTr.human.expanded -refseq ../primary/hs2.fsa -pdb ../primary/scop.fas
#perl ../../../src/ensembl-live/misc-scripts/protein_match/process_pmach.pl -ens ../primary/SPAN_pepfile -sp ../primary/SPTr.human.expanded -refseq ../primary/hs2.fsa -pdb ../primary/scop_human.fas
sub runpmatch {
print STDERR "Running pmatch\n";
#Run pmatch and store the data in files which will be kept for debugging
#my $pmatch1 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $sp $ens > ens_sp_rawpmatch";
my $pmatch1 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $sp $ens > ens_sp_rawpmatch";
my $pmatch2 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $refseq $ens > ens_refseq_rawpmatch";
#my $pmatch3 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $pdb $ens > ens_pdb_rawpmatch";
my $pmatch3 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $pdb $ens > ens_pdb_rawpmatch";
#system($pmatch1); # == 0 or die "$0\Error running '$pmatch1' : $!";
system($pmatch1); # == 0 or die "$0\Error running '$pmatch1' : $!";
system($pmatch2); #== 0 or die "$0\Error running '$pmatch2' : $!";
#system($pmatch3); #== 0 or die "$0\Error running '$pmatch2' : $!";
system($pmatch3); #== 0 or die "$0\Error running '$pmatch2' : $!";
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment