Commit 0a56de8c authored by Emmanuel Mongin's avatar Emmanuel Mongin
Browse files

Few changes, added a script to load the mapping into the database

parent 3048a7ba
......@@ -92,14 +92,6 @@ while (<ENS2>) {
$_ =~ s/\r//g;
my ($hgnc1,$hugo) = split(/\t/,$_);
# if (!defined $hugo_sp{$hgnc1}) {
# print ERROR "Can't map back $hugo_sp{$hgnc} (ENS2)\n";
# }
# if (!defined $hugo_refseq{$hgnc1}) {
# print ERROR "Can't map back $hugo_refseq{$hgnc} (ENS2)\n";
# }
if ($hugo_sp{$hgnc1}) {
print OUT "$map{$hugo_sp{$hgnc1}}\t$hugo_sp{$hgnc1}\tHUGOSYMBOL\t$hugo\n";
}
......
......@@ -39,6 +39,8 @@ my %embl_clone;
'output:s'=>\$out
);
#perl ../../../src/ensembl-live/misc-scripts/protein_match/get_xrefs.pl -mapping ../map_outputs/totalmap.final -xrefs ../sec_outputs/xrefs.map -dbmap ../sec_outputs/mapdb.map -refseq ../primary/hs.gnp -output final.map
open (DBMAP,"$dbmap") || die "Can't open file $dbmap\n";
open (XREF,"$xrefs") || die "Can't open file $xrefs\n";
open (MAP,"$mapping") || die "Can't open file $mapping\n";
......
use strict;
use DBI;
use Getopt::Long;
use Bio::EnsEMBL::DBSQL::DBEntryAdaptor;
use Bio::EnsEMBL::DBEntry;
my %hugosyn;
my %hugosymbol;
my %scopsyn;
my %gene_map;
my %transcript_map;
my ($mapping, $hugosyn, $scopsyn, $out);
&GetOptions(
'mapping:s'=>\$mapping,
'hugosyn:s'=>\$hugosyn,
'scopsyn:s'=>\$scopsyn
);
my $dsn = "DBI:mysql:database=ensembl090_tmp;host=ecs1c";
my $db = DBI->connect("$dsn",'ensadmin') || die ("Could not connect to db!");
my $adaptor = Bio::EnsEMBL::DBSQL::DBEntryAdaptor->new($db);
#open (MAPS, "$map");
#while (<MAP>) {
# chomp;
# my ($transcript,$gen
#Read Hugo file to get out synonyms
open (HUGO, "$hugosyn") || die "Can't open file $mapping\n";
while (<HUGO>) {
chomp;
my ($hgnc, $symbol, $alias, $withdrawn) = split (/\t/,$_);
my @aliases = split (/, /,$alias);
my @withdrawns = split (/, /,$withdrawn);
$hugosymbol{$symbol}=$hgnc;
foreach my $al(@aliases) {
push(@{$hugosyn{$symbol}},$al);
}
foreach my $wi(@withdrawns) {
push(@{$hugosyn{$symbol}},$wi);
}
}
close (HUGO);
#Read SCOP file to get out synonyms
open (SCOP, "$scopsyn") || die "Can't open file $scopsyn\n";
while (<SCOP>) {
chomp;
my ($scopac, $pdb, $chain, $scopnb) = split(/\t/,$_);
#my $uni = "$pdb||$chain";
push(@{$scopsyn{$scopac}},$pdb);
push(@{$scopsyn{$scopac}},$chain);
push(@{$scopsyn{$scopac}},$scopnb);
}
close (SCOP);
#Read final mapping
open (MAPPING, "$mapping") || die "Can't open file $mapping\n";
while (<MAPPING>) {
chomp;
my ($ens, $db, $primary_ac) = split(/\t/,$_);
#Get SP mapping
if (($db ne "HUGOSYMBOL") && ($db ne "SCOP") && ($db ne "SCOP1") && ($db ne "HUGOID") && ($db ne "HUGOALIAS") && ($db ne "HUGOWITHDRAWN")) {
my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1";
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
-display_id => $primary_ac,
-version => 1,
-release => 1,
-dbname => $db );
$adaptor->store($dbentry,$ens,"Gene");
}
if ($db eq "HUGOSYMBOL") {
#print STDERR "HERE\n";
my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1";
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
-display_id => $primary_ac,
-version => 1,
-release => 1,
-dbname => $db );
if ($hugosyn{$primary_ac}) {
my @synonyms = @{$hugosyn{$primary_ac}};
#print STDERR "SYN: @synonyms\n";
foreach my $syn (@synonyms) {
if ($syn =~ /\S+/) {
#print STDERR "$syn\n";
$dbentry->add_synonym($syn);
}
}
}
$adaptor->store($dbentry,$ens,"Gene");
}
if ($db eq "SCOP") {
my ($ac1) = $ens =~ /COBP(\d+)/;
$ens = "COBT"."$ac1";
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
-display_id => $primary_ac,
-version => 1,
-release => 1,
-dbname => $db );
if ($scopsyn{$primary_ac}) {
my @synonyms = @{$scopsyn{$primary_ac}};
foreach my $syn (@synonyms) {
if ($syn =~ /\S+/) {
$dbentry->add_synonym($syn);
}
}
}
$adaptor->store($dbentry,$ens,"Gene");
}
}
......@@ -39,6 +39,8 @@ my ($ens,$sp,$refseq,$pdb);
&finalprocess($refseq);
&finalprocess($pdb);
#perl ../../../src/ensembl-live/misc-scripts/protein_match/process_pmach.pl -ens ../primary/TGWpep -sp ../primary/SPTr.human.expanded -refseq ../primary/hs2.fsa -pdb ../primary/scop.fas
sub runpmatch {
print STDERR "Running pmatch\n";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment