Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl
Commits
0d19e0bd
Commit
0d19e0bd
authored
Apr 19, 2001
by
Emmanuel Mongin
Browse files
mapping dev
E.
parent
0a56de8c
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
105 additions
and
36 deletions
+105
-36
misc-scripts/protein_match/get_embl_mim_mapping.pl
misc-scripts/protein_match/get_embl_mim_mapping.pl
+12
-3
misc-scripts/protein_match/load_mapping.pl
misc-scripts/protein_match/load_mapping.pl
+85
-25
misc-scripts/protein_match/process_pmach.pl
misc-scripts/protein_match/process_pmach.pl
+8
-8
No files found.
misc-scripts/protein_match/get_embl_mim_mapping.pl
View file @
0d19e0bd
...
...
@@ -39,14 +39,23 @@ while ( my $seq1 = $in1->next_seq() ) {
foreach
my
$link
(
@dblink
)
{
if
((
$link
->
database
eq
"
EMBL
")
||
(
$link
->
database
eq
"
MIM
"))
{
if
(
$link
->
database
eq
"
EMBL
")
{
if
(
!
defined
$map
{
$ac
})
{
die
"
Can't map
$ac
\n
";
}
print
OUT
"
$map
{
$ac
}
\t
$ac
\t
EMBL_AC
\t
"
.
$link
->
primary_id
,"
\n
";
print
OUT
"
$map
{
$ac
}
\t
$ac
\t
EMBL_PROT_AC
\t
"
.
$link
->
optional_id
,"
\n
";
}
if
(
$link
->
database
eq
"
MIM
")
{
if
(
!
defined
$map
{
$ac
})
{
die
"
Can't map
$ac
\n
";
}
print
OUT
"
$map
{
$ac
}
\t
$ac
\t
"
.
$link
->
database
.
"
\t
"
.
$link
->
primary_id
,"
\n
";
print
OUT
"
$map
{
$ac
}
\t
$ac
\t
OMIM
\t
"
.
$link
->
primary_id
,"
\n
";
}
}
}
...
...
misc-scripts/protein_match/load_mapping.pl
View file @
0d19e0bd
...
...
@@ -3,20 +3,24 @@ use DBI;
use
Getopt::
Long
;
use
Bio::EnsEMBL::DBSQL::
DBEntryAdaptor
;
use
Bio::EnsEMBL::
DBEntry
;
use
Bio::
SeqIO
;
my
%hugosyn
;
my
%hugo
symbol
;
my
%hugo
id
;
my
%scopsyn
;
my
%scopid
;
my
%gene_map
;
my
%transcript_map
;
my
(
$mapping
,
$hugosyn
,
$scopsyn
,
$out
)
;
my
%spid
;
my
(
$mapping
,
$hugosyn
,
$scopsyn
,
$out
,
$spsyn
);
#perl ../../src/ensembl-live/misc-scripts/protein_match/load_mapping.pl -mapping outputs/final_sorted.map -hugosyn secondary/ens4.txt -scopsyn secondary/dir.dom.scop.txt_1.53 -spid primary/hum_sp_sptrembl.pep
&GetOptions
(
'
mapping:s
'
=>\
$mapping
,
'
hugosyn:s
'
=>\
$hugosyn
,
'
scopsyn:s
'
=>\
$scopsyn
'
scopsyn:s
'
=>\
$scopsyn
,
'
spid:s
'
=>\
$spsyn
);
my
$dsn
=
"
DBI:mysql:database=ensembl090_tmp;host=ecs1c
";
...
...
@@ -25,21 +29,30 @@ my $db = DBI->connect("$dsn",'ensadmin') || die ("Could not connect to db!");
my
$adaptor
=
Bio::EnsEMBL::DBSQL::
DBEntryAdaptor
->
new
(
$db
);
#open (MAPS, "$map");
#while (<MAP>) {
# chomp;
# my ($transcript,$gen
#Read Hugo file to get out synonyms
print
STDERR
"
Getting SP mapping
\n
";
#my $in = Bio::SeqIO->new(-file => $spsyn, '-format' =>'swiss');
#while ( my $seq = $in->next_seq() ) {
my
$ac
;
# = $seq->accession;
my
$id
;
# = $seq->id;
$spid
{
$ac
}
=
$id
;
#}
open
(
HUGO
,
"
$hugosyn
")
||
die
"
Can't open file
$mapping
\n
";
while
(
<
HUGO
>
)
{
chomp
;
#get red of the cariage return present in Hugos
$_
=~
s/\r//g
;
my
(
$hgnc
,
$symbol
,
$alias
,
$withdrawn
)
=
split
(
/\t/
,
$_
);
my
@aliases
=
split
(
/, /
,
$alias
);
my
@withdrawns
=
split
(
/, /
,
$withdrawn
);
$hugo
symbol
{
$symbol
}
=
$hgnc
;
$hugo
id
{
$hgnc
}
=
$symbol
;
foreach
my
$al
(
@aliases
)
{
push
(
@
{
$hugosyn
{
$symbol
}},
$al
);
...
...
@@ -60,8 +73,15 @@ while (<SCOP>) {
#my $uni = "$pdb||$chain";
push
(
@
{
$scopsyn
{
$scopac
}},
$pdb
);
push
(
@
{
$scopsyn
{
$scopac
}},
$chain
);
#Set up the display id
my
$display
=
$pdb
.
"
"
.
$chain
;
push
(
@
{
$scopid
{
$scopac
}},
$display
);
#push(@{$scopsyn{$scopac}},$pdb);
#push(@{$scopsyn{$scopac}},$chain);
#Scop number becomes a synonym (not stable)
push
(
@
{
$scopsyn
{
$scopac
}},
$scopnb
);
}
close
(
SCOP
);
...
...
@@ -70,12 +90,20 @@ close (SCOP);
open
(
MAPPING
,
"
$mapping
")
||
die
"
Can't open file
$mapping
\n
";
while
(
<
MAPPING
>
)
{
chomp
;
$_
=~
s/\r//g
;
my
(
$ens
,
$db
,
$primary_ac
)
=
split
(
/\t/
,
$_
);
#Get SP mapping
if
((
$db
ne
"
HUGOSYMBOL
")
&&
(
$db
ne
"
SCOP
")
&&
(
$db
ne
"
SCOP1
")
&&
(
$db
ne
"
HUGOID
")
&&
(
$db
ne
"
HUGOALIAS
")
&&
(
$db
ne
"
HUGOWITHDRAWN
"))
{
#if (($db ne "HUGOSYMBOL") && ($db ne "SCOP") && ($db ne "SCOP1") && ($db ne "SCOP2") && ($db ne "HUGOID") && ($db ne "HUGOALIAS") && ($db ne "HUGOWITHDRAWN")) {
if
((
$db
eq
"
EMBL
")
||
(
$db
eq
"
EC
")
||
(
$db
eq
"
OMIM
")
||
(
$db
eq
"
REFSEQ
")
||
(
$db
eq
"
LOCUS
"))
{
##############Temporary changes###########################
my
(
$ac1
)
=
$ens
=~
/COBP(\d+)/
;
$ens
=
"
COBT
"
.
"
$ac1
";
##########################################################
my
$dbentry
=
Bio::EnsEMBL::
DBEntry
->
new
(
-
adaptor
=>
$adaptor
,
-
primary_id
=>
$primary_ac
,
...
...
@@ -86,27 +114,48 @@ while (<MAPPING>) {
$adaptor
->
store
(
$dbentry
,
$ens
,"
Gene
");
}
if
(
$db
eq
"
HUGOSYMBOL
")
{
#print STDERR "HERE\n";
if
((
$db
eq
"
SP
")
||
(
$db
eq
"
SPTREMBL
"))
{
if
(
!
defined
$spid
{
$primary_ac
})
{
#print "SP primary Ac ($primary_ac) does not have an id\n";
}
my
$dbentry
=
Bio::EnsEMBL::
DBEntry
->
new
(
-
adaptor
=>
$adaptor
,
-
primary_id
=>
$primary_ac
,
-
display_id
=>
$spid
{
$primary_ac
},
-
version
=>
1
,
-
release
=>
1
,
-
dbname
=>
$db
);
}
if
(
$db
eq
"
HUGOID
")
{
##################Temporary changes#######################
my
(
$ac1
)
=
$ens
=~
/COBP(\d+)/
;
$ens
=
"
COBT
"
.
"
$ac1
";
##########################################################
if
(
!
defined
$hugoid
{
$primary_ac
})
{
print
"
Hugo primary Ac (
$primary_ac
) does not have an id
\n
";
}
my
$dbentry
=
Bio::EnsEMBL::
DBEntry
->
new
(
-
adaptor
=>
$adaptor
,
-
primary_id
=>
$primary_ac
,
-
display_id
=>
$primary_ac
,
-
display_id
=>
$hugoid
{
$primary_ac
}
,
-
version
=>
1
,
-
release
=>
1
,
-
dbname
=>
$db
);
if
(
$hugosyn
{
$primary_ac
})
{
my
@synonyms
=
@
{
$hugosyn
{
$primary_ac
}};
#print STDERR "SYN: @synonyms\n";
foreach
my
$syn
(
@synonyms
)
{
if
(
$syn
=~
/\S+/
)
{
#print STDERR "$syn\n";
$dbentry
->
add_synonym
(
$syn
);
}
}
}
...
...
@@ -115,13 +164,21 @@ while (<MAPPING>) {
$adaptor
->
store
(
$dbentry
,
$ens
,"
Gene
");
}
if
(
$db
eq
"
SCOP
")
{
if
(
$db
eq
"
SCOP
")
{
#############tmp########################
my
(
$ac1
)
=
$ens
=~
/COBP(\d+)/
;
$ens
=
"
COBT
"
.
"
$ac1
";
########################################
if
(
!
defined
$scopid
{
$primary_ac
})
{
print
"
SCOP primary Ac (
$primary_ac
) does not have an id
\n
";
}
my
$dbentry
=
Bio::EnsEMBL::
DBEntry
->
new
(
-
adaptor
=>
$adaptor
,
-
primary_id
=>
$primary_ac
,
-
display_id
=>
$primary_ac
,
-
display_id
=>
$scopid
{
$primary_ac
}
,
-
version
=>
1
,
-
release
=>
1
,
-
dbname
=>
$db
);
...
...
@@ -138,3 +195,6 @@ while (<MAPPING>) {
}
}
misc-scripts/protein_match/process_pmach.pl
View file @
0d19e0bd
...
...
@@ -32,12 +32,12 @@ my ($ens,$sp,$refseq,$pdb);
);
&runpmatch
();
&postprocesspmatch
(
$sp
);
#
&postprocesspmatch($sp);
&postprocesspmatch
(
$refseq
);
&postprocesspmatch
(
$pdb
);
&finalprocess
(
$sp
);
#
&postprocesspmatch($pdb);
#
&finalprocess($sp);
&finalprocess
(
$refseq
);
&finalprocess
(
$pdb
);
#
&finalprocess($pdb);
#perl ../../../src/ensembl-live/misc-scripts/protein_match/process_pmach.pl -ens ../primary/TGWpep -sp ../primary/SPTr.human.expanded -refseq ../primary/hs2.fsa -pdb ../primary/scop.fas
...
...
@@ -45,14 +45,14 @@ sub runpmatch {
print
STDERR
"
Running pmatch
\n
";
#Run pmatch and store the data in files which will be kept for debugging
my
$pmatch1
=
"
/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14
$sp
$ens
> ens_sp_rawpmatch
";
#
my $pmatch1 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $sp $ens > ens_sp_rawpmatch";
my
$pmatch2
=
"
/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14
$refseq
$ens
> ens_refseq_rawpmatch
";
my
$pmatch3
=
"
/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14
$pdb
$ens
> ens_pdb_rawpmatch
";
#
my $pmatch3 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $pdb $ens > ens_pdb_rawpmatch";
system
(
$pmatch1
);
# == 0 or die "$0\Error running '$pmatch1' : $!";
#
system($pmatch1); # == 0 or die "$0\Error running '$pmatch1' : $!";
system
(
$pmatch2
);
#== 0 or die "$0\Error running '$pmatch2' : $!";
system
(
$pmatch3
);
#== 0 or die "$0\Error running '$pmatch2' : $!";
#
system($pmatch3); #== 0 or die "$0\Error running '$pmatch2' : $!";
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment