Commit f702f9ca authored by Emmanuel Mongin's avatar Emmanuel Mongin
Browse files

Still removing ....

parent 738b6b16
use strict;
=head1 get_all_external
=head2 Description
This script will call all of the other script involved in the mapping of cross references.
=head2 Options
-input: Full pathname for the directory where the input files are stored.
-output: Full pathname for the directory where the output files should be stored
=head2 Filenames
The filenames of the input files are hardcoded on purpose. The input files should have the same names than the ones hardcoded in this script.
=head2 Contact
mongin@ebi.ac.uk
birney@ebi.ac.uk
=cut
use Getopt::Long;
my ($input,$output);
&GetOptions(
'input:s'=>\$input,
'output:s'=>\$output
);
##########################################
#First should run the script to get DBMAP#
##########################################
#Think to add $input and $output
print STDERR "Getting external DBs (EMBL and MIM) from Swiss-Prot\n";
my $get_embl_mim = "perl get_embl_mim_mapping.pl -sp sp_trembl.swiss -dbmap dbmap.extmap -output embl_mim.extmap";
system($get_embl_mim) == 0 or die "$0\Error running '$get_embl_mim'";
print STDERR "Getting external DBs (MIM and LOCUS) from Refseq\n";
my $get_mim_locus = "perl get_embl_mim_mapping.pl -refseq refseq.genbank -dbmap dbmap.extmap -output mim_locus.extmap";
system($get_mim_locus) == 0 or die "$0\Error running '$get_mim_locus'";
print STDERR "Getting HUGOs using HUGO mapping with SP and Refseq\n";
my $get_hugo = "perl get_hugo_mapping.pl -nomeid nomeids.txt -ens1 ens1.txt -ens2 ens2.txt -dbmap dbmap.extmap -output hugo.extmap";
system($get_hugo) == 0 or die "$0\Error running '$get_hugo'";
use strict;
use DBI;
use Getopt::Long;
use Bio::EnsEMBL::DBSQL::DBEntryAdaptor;
use Bio::EnsEMBL::DBEntry;
use Bio::SeqIO;
my %hugosyn;
my %hugoid;
my %scopsyn;
my %scopid;
my %gene_map;
my %transcript_map;
my %spid;
my ($mapping, $hugosyns, $scopsyn, $out, $spsyn);
#perl ../../src/ensembl-live/misc-scripts/protein_match/load_mapping.pl -mapping outputs/final_sorted.map -hugosyn secondary/nomeid.txt -scopsyn secondary/dir.dom.scop.txt_1.53 -spid primary/hum_sp_sptrembl.pep
&GetOptions(
'mapping:s'=>\$mapping,
'hugosyn:s'=>\$hugosyns,
'scopsyn:s'=>\$scopsyn,
'spid:s'=>\$spsyn
);
my $dsn = "DBI:mysql:database=xrefs100_tmp;host=ecs1c";
my $db = DBI->connect("$dsn",'ensadmin') || die ("Could not connect to db!");
my $adaptor = Bio::EnsEMBL::DBSQL::DBEntryAdaptor->new($db);
print STDERR "Getting SP mapping\n";
my $in = Bio::SeqIO->new(-file => $spsyn, '-format' =>'swiss');
while ( my $seq = $in->next_seq() ) {
my $ac = $seq->accession;
my $id = $seq->id;
$spid{$ac} = $id;
}
open (HUGO, "$hugosyns") || die "Can't open file $mapping\n";
while (<HUGO>) {
chomp;
#get red of the cariage return present in Hugos
$_ =~ s/\r//g;
#my ($hgnc, $symbol, $alias, $withdrawn) = split (/\t/,$_);
my @hug = split (/\t/,$_);
my $hgnc = $hug[0];
my $symbol = $hug[1];
my $alias = $hug[8];
my @aliases = split (/, /,$alias);
#my @withdrawns = split (/, /,$withdrawn);
$hugoid{$hgnc}=$symbol;
foreach my $al(@aliases) {
push(@{$hugosyn{$hgnc}},$al);
}
#foreach my $wi(@withdrawns) {
#push(@{$hugosyn{$symbol}},$wi);
#}
}
close (HUGO);
#Read SCOP file to get out synonyms
open (SCOP, "$scopsyn") || die "Can't open file $scopsyn\n";
while (<SCOP>) {
chomp;
my ($scopac, $pdb, $chain, $scopnb) = split(/\t/,$_);
#my $uni = "$pdb||$chain";
#Set up the display id
my $display = $pdb." ".$chain;
#push (@{$scopid{$scopac}},$display);
$scopid{$scopac} = $display;
#push(@{$scopsyn{$scopac}},$pdb);
#push(@{$scopsyn{$scopac}},$chain);
#Scop number becomes a synonym (not stable)
push(@{$scopsyn{$scopac}},$scopnb);
}
close (SCOP);
#Read final mapping
open (MAPPING, "$mapping") || die "Can't open file $mapping\n";
while (<MAPPING>) {
chomp;
$_ =~ s/\r//g;
my ($ens, $db, $primary_ac) = split(/\t/,$_);
#Get SP mapping
#if (($db ne "HUGOSYMBOL") && ($db ne "SCOP") && ($db ne "SCOP1") && ($db ne "SCOP2") && ($db ne "HUGOID") && ($db ne "HUGOALIAS") && ($db ne "HUGOWITHDRAWN")) {
if (($db eq "EMBL_AC") || ($db eq "EMBL_PROT_AC") || ($db eq "EC") || ($db eq "OMIM") || ($db eq "REFSEQ") || ($db eq "LOCUS")) {
##############Temporary changes###########################
#my ($ac1) = $ens =~ /COBP(\d+)/;
#$ens = "COBT"."$ac1";
##########################################################
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
-display_id => $primary_ac,
-version => 1,
-release => 1,
-dbname => $db );
$adaptor->store($dbentry,$ens,"Translation");
}
if (($db eq "SP") || ($db eq "SPTREMBL")) {
if (!defined $spid{$primary_ac}) {
#print "SP primary Ac ($primary_ac) does not have an id\n";
}
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
-display_id => $spid{$primary_ac},
-version => 1,
-release => 1,
-dbname => $db );
$adaptor->store($dbentry,$ens,"Translation");
}
if ($db eq "HUGOID") {
##################Temporary changes#######################
#my ($ac1) = $ens =~ /COBP(\d+)/;
#$ens = "COBT"."$ac1";
##########################################################
if (!defined $hugoid{$primary_ac}) {
print "Hugo primary Ac ($primary_ac) does not have an id\n";
}
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
-display_id => $hugoid{$primary_ac},
-version => 1,
-release => 1,
-dbname => $db );
if ($hugosyn{$primary_ac}) {
my @synonyms = @{$hugosyn{$primary_ac}};
#print STDERR "SYN: @synonyms\t$primary_ac\n";
foreach my $syn (@synonyms) {
if ($syn =~ /\S+/) {
$dbentry->add_synonym($syn);
}
}
}
$adaptor->store($dbentry,$ens,"Translation");
}
if ($db eq "SCOP") {
my $dspl;
#############tmp########################
#my ($ac1) = $ens =~ /COBP(\d+)/;
#$ens = "COBT"."$ac1";
########################################
$dspl = $scopid{$primary_ac};
if (!defined $scopid{$primary_ac}) {
print "SCOP primary Ac ($primary_ac) does not have an id\n";
($dspl) = $primary_ac =~ /\w{1}(\w{4})/;
}
my $dbentry = Bio::EnsEMBL::DBEntry->new
( -adaptor => $adaptor,
-primary_id => $primary_ac,
-display_id => $dspl,
-version => 1,
-release => 1,
-dbname => $db );
if ($scopsyn{$primary_ac}) {
my @synonyms = @{$scopsyn{$primary_ac}};
foreach my $syn (@synonyms) {
if ($syn =~ /\S+/) {
$dbentry->add_synonym($syn);
}
}
}
$adaptor->store($dbentry,$ens,"Translation");
}
}
use strict;
=head1 map_ensembl2external.pl
=head2 Description
This script runs pmatch and postprocess it and put the results of pmatch together with with the external mapping to produce a file ready to go into TranscriptDBlink
=head2 Options
-input: Full pathname for the directory where the input files are stored.
-output: Full pathname for the directory where the output files should be stored
=head2 Filenames
The filenames of the input files are hardcoded on purpose. The input files should have the same names than the ones hardcoded in this script.
=head2 Contact
mongin@ebi.ac.uk
birney@ebi.ac.uk
=cut
use Getopt::Long;
my ($input,$output);
&GetOptions(
'input:s'=>\$input,
'output:s'=>\$output
);
#Think to add $input and $output
print STDERR "Running script process_pmatch.pl\n";
#The output has to be directed in process_pmatch
my $process_pmatch = "perl process_pmatch.pl -ens ensembl.fas -sp sp_trembl.fas -refseq refseq.fas";
system($process_pmatch) == 0 or die "$0\Error running '$process_pmatch'";
print STDERR "Running script get_xrefs.pl\n";
my $get_xrefs "perl get_xrefs.pl -mapping ens_mapping.map -xrefs ext_mapping.extmap -dbmap dbmap.extmap -refseq ???? -output transcriptdblink.txt";
system($get_xrefs) == 0 or die "$0\Error running '$get_xrefs'";
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment