Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl
Commits
f4359138
Commit
f4359138
authored
Apr 23, 2001
by
Emmanuel Mongin
Browse files
Set of script with the changes I've used for the mapping
parent
054348b2
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
89 additions
and
51 deletions
+89
-51
misc-scripts/protein_match/get_embl_mim_mapping.pl
misc-scripts/protein_match/get_embl_mim_mapping.pl
+5
-1
misc-scripts/protein_match/get_xrefs.pl
misc-scripts/protein_match/get_xrefs.pl
+19
-9
misc-scripts/protein_match/load_mapping.pl
misc-scripts/protein_match/load_mapping.pl
+56
-32
misc-scripts/protein_match/process_pmach.pl
misc-scripts/protein_match/process_pmach.pl
+9
-9
No files found.
misc-scripts/protein_match/get_embl_mim_mapping.pl
View file @
f4359138
...
...
@@ -45,7 +45,9 @@ while ( my $seq1 = $in1->next_seq() ) {
}
print
OUT
"
$map
{
$ac
}
\t
$ac
\t
EMBL_AC
\t
"
.
$link
->
primary_id
,"
\n
";
print
OUT
"
$map
{
$ac
}
\t
$ac
\t
EMBL_PROT_AC
\t
"
.
$link
->
optional_id
,"
\n
";
my
(
$protac
)
=
$link
->
optional_id
=~
/^(\w+).\S+/
;
print
OUT
"
$map
{
$ac
}
\t
$ac
\t
EMBL_PROT_AC
\t
$protac
\n
";
}
if
(
$link
->
database
eq
"
MIM
")
{
if
(
!
defined
$map
{
$ac
})
{
...
...
@@ -57,6 +59,8 @@ while ( my $seq1 = $in1->next_seq() ) {
}
}
...
...
misc-scripts/protein_match/get_xrefs.pl
View file @
f4359138
...
...
@@ -39,7 +39,7 @@ my %embl_clone;
'
output:s
'
=>\
$out
);
#perl ../../../src/ensembl-live/misc-scripts/protein_match/get_xrefs.pl -mapping ../map_outputs/
total
map.
fin
al -xrefs ../sec_outputs/xref
s.map
-dbmap ../sec_outputs/mapdb.map -refseq ../primary/hs.gnp -output final.map
#perl ../../../src/ensembl-live/misc-scripts/protein_match/get_xrefs.pl -mapping ../map_outputs/map.
tot
al -xrefs ../sec_outputs/xref
.total
-dbmap ../sec_outputs/mapdb.map -refseq ../primary/hs.gnp -output final.map
open
(
DBMAP
,"
$dbmap
")
||
die
"
Can't open file
$dbmap
\n
";
open
(
XREF
,"
$xrefs
")
||
die
"
Can't open file
$xrefs
\n
";
...
...
@@ -52,12 +52,12 @@ open (OUT,">$out") || die "Can't open file $out\n";
#open (CLONE,"clones.txt") || die "Can't open file\n";
#Put in a hash all of the embl clones used by Ensembl
while
(
<
CLONE
>
)
{
chomp
;
my
(
$embl_ac
,
$id
)
=
split
(
/\t/
,
$_
);
print
"
$embl_ac
\n
";
$embl_clone
{
$embl_ac
}
=
1
;
}
#
while (<CLONE>) {
#
chomp;
#
my ($embl_ac,$id) = split(/\t/,$_);
#
print "$embl_ac\n";
#
$embl_clone{$embl_ac}=1;
#
}
while
(
<
DBMAP
>
)
{
...
...
@@ -105,7 +105,7 @@ while (<XREF>) {
#Get the embl ACs for each SP and SPTREMBL proteins
#if ((($xrdb eq "SP") || ($xrdb eq "SPTREMBL")) && ($db eq "EMBL")) {
#print "$id\n";
#if ($embl_clone{$id}) {
#if ($embl_clone{$id}) {
# push(@{$sp2embl{$xrac}},$id);
#}
...
...
@@ -117,7 +117,9 @@ while (<MAP>) {
#P01111 COBP00000000001 100 PRIMARY
my
(
$xr
,
$ens
,
$perc
,
$tag
)
=
split
(
/\t/
,
$_
);
if
(
$xr
=~
/^\w+-\d{2}/
)
{
(
$xr
)
=
$xr
=~
/^(\w+)-\d{2}/
;
}
#Hack to be taken away
#my ($en1,$en2) = $ens =~ /(\w{3})P(\d+)/;
#my $enst = $en1."T".$en2;
...
...
@@ -161,10 +163,18 @@ while (<MAP>) {
#Print the know gene AC and its database
#print OUT "$ens\t$map{$xr}\t$xr\n";
#}
if
(
!
defined
$map
{
$xr
})
{
print
STDERR
"
can't find primary
$xr
\n
";
}
print
OUT
"
$ens
\t
$map
{
$xr
}
\t
$xr
\n
";
#Print all of the external database it links to (eg: HUGO)
foreach
my
$both
(
@
{
$hash
{
$xr
}}){
if
(
!
defined
$hash
{
$xr
})
{
print
STDERR
"
can't find Xref
$xr
\n
";
}
(
$a
,
$b
)
=
split
(
/&/
,
$both
);
print
OUT
"
$ens
\t
$a
\t
$b
\n
";
}
...
...
misc-scripts/protein_match/load_mapping.pl
View file @
f4359138
...
...
@@ -12,18 +12,18 @@ my %scopid;
my
%gene_map
;
my
%transcript_map
;
my
%spid
;
my
(
$mapping
,
$hugosyn
,
$scopsyn
,
$out
,
$spsyn
);
my
(
$mapping
,
$hugosyn
s
,
$scopsyn
,
$out
,
$spsyn
);
#perl ../../src/ensembl-live/misc-scripts/protein_match/load_mapping.pl -mapping outputs/final_sorted.map -hugosyn secondary/
ens4
.txt -scopsyn secondary/dir.dom.scop.txt_1.53 -spid primary/hum_sp_sptrembl.pep
#perl ../../src/ensembl-live/misc-scripts/protein_match/load_mapping.pl -mapping outputs/final_sorted.map -hugosyn secondary/
nomeid
.txt -scopsyn secondary/dir.dom.scop.txt_1.53 -spid primary/hum_sp_sptrembl.pep
&GetOptions
(
'
mapping:s
'
=>\
$mapping
,
'
hugosyn:s
'
=>\
$hugosyn
,
'
hugosyn:s
'
=>\
$hugosyn
s
,
'
scopsyn:s
'
=>\
$scopsyn
,
'
spid:s
'
=>\
$spsyn
);
my
$dsn
=
"
DBI:mysql:database=
ensembl09
0_tmp;host=ecs1c
";
my
$dsn
=
"
DBI:mysql:database=
xrefs10
0_tmp;host=ecs1c
";
my
$db
=
DBI
->
connect
("
$dsn
",'
ensadmin
')
||
die
("
Could not connect to db!
");
my
$adaptor
=
Bio::EnsEMBL::DBSQL::
DBEntryAdaptor
->
new
(
$db
);
...
...
@@ -32,35 +32,42 @@ my $adaptor = Bio::EnsEMBL::DBSQL::DBEntryAdaptor->new($db);
print
STDERR
"
Getting SP mapping
\n
";
#
my $in = Bio::SeqIO->new(-file => $spsyn, '-format' =>'swiss');
my
$in
=
Bio::
SeqIO
->
new
(
-
file
=>
$spsyn
,
'
-format
'
=>
'
swiss
');
#
while ( my $seq = $in->next_seq() ) {
my
$ac
;
#
= $seq->accession;
my
$id
;
#
= $seq->id;
while
(
my
$seq
=
$in
->
next_seq
()
)
{
my
$ac
=
$seq
->
accession
;
my
$id
=
$seq
->
id
;
$spid
{
$ac
}
=
$id
;
#
}
}
open
(
HUGO
,
"
$hugosyn
")
||
die
"
Can't open file
$mapping
\n
";
open
(
HUGO
,
"
$hugosyn
s
")
||
die
"
Can't open file
$mapping
\n
";
while
(
<
HUGO
>
)
{
chomp
;
#get red of the cariage return present in Hugos
$_
=~
s/\r//g
;
my
(
$hgnc
,
$symbol
,
$alias
,
$withdrawn
)
=
split
(
/\t/
,
$_
);
#
my ($hgnc, $symbol, $alias, $withdrawn) = split (/\t/,$_);
my
@hug
=
split
(
/\t/
,
$_
);
my
$hgnc
=
$hug
[
0
];
my
$symbol
=
$hug
[
1
];
my
$alias
=
$hug
[
8
];
my
@aliases
=
split
(
/, /
,
$alias
);
my
@withdrawns
=
split
(
/, /
,
$withdrawn
);
#
my @withdrawns = split (/, /,$withdrawn);
$hugoid
{
$hgnc
}
=
$symbol
;
foreach
my
$al
(
@aliases
)
{
push
(
@
{
$hugosyn
{
$symbol
}},
$al
);
push
(
@
{
$hugosyn
{
$hgnc
}},
$al
);
}
foreach
my
$wi
(
@withdrawns
)
{
push
(
@
{
$hugosyn
{
$symbol
}},
$wi
);
}
#
foreach my $wi(@withdrawns) {
#
push(@{$hugosyn{$symbol}},$wi);
#
}
}
close
(
HUGO
);
...
...
@@ -76,7 +83,9 @@ while (<SCOP>) {
#Set up the display id
my
$display
=
$pdb
.
"
"
.
$chain
;
push
(
@
{
$scopid
{
$scopac
}},
$display
);
#push (@{$scopid{$scopac}},$display);
$scopid
{
$scopac
}
=
$display
;
#push(@{$scopsyn{$scopac}},$pdb);
#push(@{$scopsyn{$scopac}},$chain);
...
...
@@ -95,12 +104,12 @@ while (<MAPPING>) {
#Get SP mapping
#if (($db ne "HUGOSYMBOL") && ($db ne "SCOP") && ($db ne "SCOP1") && ($db ne "SCOP2") && ($db ne "HUGOID") && ($db ne "HUGOALIAS") && ($db ne "HUGOWITHDRAWN")) {
if
((
$db
eq
"
EMBL
")
||
(
$db
eq
"
EC
")
||
(
$db
eq
"
OMIM
")
||
(
$db
eq
"
REFSEQ
")
||
(
$db
eq
"
LOCUS
"))
{
if
((
$db
eq
"
EMBL
_AC
")
||
(
$db
eq
"
EMBL_PROT_AC
")
||
(
$db
eq
"
EC
")
||
(
$db
eq
"
OMIM
")
||
(
$db
eq
"
REFSEQ
")
||
(
$db
eq
"
LOCUS
"))
{
##############Temporary changes###########################
my
(
$ac1
)
=
$ens
=~
/COBP(\d+)/
;
$ens
=
"
COBT
"
.
"
$ac1
";
#
my ($ac1) = $ens =~ /COBP(\d+)/;
#
$ens = "COBT"."$ac1";
##########################################################
...
...
@@ -111,7 +120,7 @@ while (<MAPPING>) {
-
version
=>
1
,
-
release
=>
1
,
-
dbname
=>
$db
);
$adaptor
->
store
(
$dbentry
,
$ens
,"
Gene
");
$adaptor
->
store
(
$dbentry
,
$ens
,"
Translation
");
}
...
...
@@ -129,15 +138,16 @@ while (<MAPPING>) {
-
version
=>
1
,
-
release
=>
1
,
-
dbname
=>
$db
);
$adaptor
->
store
(
$dbentry
,
$ens
,"
Translation
");
}
if
(
$db
eq
"
HUGOID
")
{
##################Temporary changes#######################
my
(
$ac1
)
=
$ens
=~
/COBP(\d+)/
;
$ens
=
"
COBT
"
.
"
$ac1
";
#
my ($ac1) = $ens =~ /COBP(\d+)/;
#
$ens = "COBT"."$ac1";
##########################################################
if
(
!
defined
$hugoid
{
$primary_ac
})
{
print
"
Hugo primary Ac (
$primary_ac
) does not have an id
\n
";
}
...
...
@@ -150,8 +160,12 @@ while (<MAPPING>) {
-
version
=>
1
,
-
release
=>
1
,
-
dbname
=>
$db
);
if
(
$hugosyn
{
$primary_ac
})
{
my
@synonyms
=
@
{
$hugosyn
{
$primary_ac
}};
#print STDERR "SYN: @synonyms\t$primary_ac\n";
foreach
my
$syn
(
@synonyms
)
{
if
(
$syn
=~
/\S+/
)
{
...
...
@@ -161,24 +175,27 @@ while (<MAPPING>) {
}
$adaptor
->
store
(
$dbentry
,
$ens
,"
Gene
");
$adaptor
->
store
(
$dbentry
,
$ens
,"
Translation
");
}
if
(
$db
eq
"
SCOP
")
{
if
(
$db
eq
"
SCOP
")
{
my
$dspl
;
#############tmp########################
my
(
$ac1
)
=
$ens
=~
/COBP(\d+)/
;
$ens
=
"
COBT
"
.
"
$ac1
";
#
my ($ac1) = $ens =~ /COBP(\d+)/;
#
$ens = "COBT"."$ac1";
########################################
$dspl
=
$scopid
{
$primary_ac
};
if
(
!
defined
$scopid
{
$primary_ac
})
{
print
"
SCOP primary Ac (
$primary_ac
) does not have an id
\n
";
(
$dspl
)
=
$primary_ac
=~
/\w{1}(\w{4})/
;
}
my
$dbentry
=
Bio::EnsEMBL::
DBEntry
->
new
(
-
adaptor
=>
$adaptor
,
-
primary_id
=>
$primary_ac
,
-
display_id
=>
$
scopid
{
$primary_ac
}
,
-
display_id
=>
$
dspl
,
-
version
=>
1
,
-
release
=>
1
,
-
dbname
=>
$db
);
...
...
@@ -190,7 +207,7 @@ if ($db eq "SCOP") {
}
}
}
$adaptor
->
store
(
$dbentry
,
$ens
,"
Gene
");
$adaptor
->
store
(
$dbentry
,
$ens
,"
Translation
");
}
...
...
@@ -198,3 +215,10 @@ if ($db eq "SCOP") {
misc-scripts/protein_match/process_pmach.pl
View file @
f4359138
...
...
@@ -32,27 +32,27 @@ my ($ens,$sp,$refseq,$pdb);
);
&runpmatch
();
#
&postprocesspmatch($sp);
&postprocesspmatch
(
$sp
);
&postprocesspmatch
(
$refseq
);
#
&postprocesspmatch($pdb);
#
&finalprocess($sp);
&postprocesspmatch
(
$pdb
);
&finalprocess
(
$sp
);
&finalprocess
(
$refseq
);
#
&finalprocess($pdb);
&finalprocess
(
$pdb
);
#perl ../../../src/ensembl-live/misc-scripts/protein_match/process_pmach.pl -ens ../primary/
TGWpep
-sp ../primary/SPTr.human.expanded -refseq ../primary/hs2.fsa -pdb ../primary/scop.fas
#perl ../../../src/ensembl-live/misc-scripts/protein_match/process_pmach.pl -ens ../primary/
SPAN_pepfile
-sp ../primary/SPTr.human.expanded -refseq ../primary/hs2.fsa -pdb ../primary/scop
_human
.fas
sub
runpmatch
{
print
STDERR
"
Running pmatch
\n
";
#Run pmatch and store the data in files which will be kept for debugging
#
my $pmatch1 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $sp $ens > ens_sp_rawpmatch";
my
$pmatch1
=
"
/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14
$sp
$ens
> ens_sp_rawpmatch
";
my
$pmatch2
=
"
/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14
$refseq
$ens
> ens_refseq_rawpmatch
";
#
my $pmatch3 = "/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14 $pdb $ens > ens_pdb_rawpmatch";
my
$pmatch3
=
"
/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14
$pdb
$ens
> ens_pdb_rawpmatch
";
#
system($pmatch1); # == 0 or die "$0\Error running '$pmatch1' : $!";
system
(
$pmatch1
);
# == 0 or die "$0\Error running '$pmatch1' : $!";
system
(
$pmatch2
);
#== 0 or die "$0\Error running '$pmatch2' : $!";
#
system($pmatch3); #== 0 or die "$0\Error running '$pmatch2' : $!";
system
(
$pmatch3
);
#== 0 or die "$0\Error running '$pmatch2' : $!";
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment