Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
ensembl-gh-mirror
ensembl
Commits
123a2a50
Commit
123a2a50
authored
Apr 07, 2001
by
Emmanuel Mongin
Browse files
Added a small script needed in the mapping process
parent
3656484b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
138 additions
and
7 deletions
+138
-7
misc-scripts/protein_match/get_dbmap.pl
misc-scripts/protein_match/get_dbmap.pl
+111
-0
misc-scripts/protein_match/get_embl_mim_mapping.pl
misc-scripts/protein_match/get_embl_mim_mapping.pl
+1
-1
misc-scripts/protein_match/process_pmach.pl
misc-scripts/protein_match/process_pmach.pl
+26
-6
No files found.
misc-scripts/protein_match/get_dbmap.pl
0 → 100644
View file @
123a2a50
use
strict
;
=head1 get_all_external
=head2 Description
This script will set up a mapping of each primary database (SP, SPTREMBL, Refseq and PDB) to their respective name. This mapping will be then used to give a database name to each ACs.
=head2 Options
-sp
-sptrembl
-refseq
-pdb
=head2 Contact
mongin@ebi.ac.uk
birney@ebi.ac.uk
=cut
#perl ../../../src/ensembl-live/misc-scripts/protein_match/get_dbmap.pl -sp ../primary/01_04_hum_sprot.txt -sptrembl ../primary/07_04_hum_sptrembl.txt -refseq ../primary/hs.gnp -scop ../primary/scop.fas
use
Getopt::
Long
;
use
Bio::
SeqIO
;
my
(
$sp
,
$sptrembl
,
$refseq
,
$scop
,
$out
);
&GetOptions
(
'
sp:s
'
=>\
$sp
,
'
sptrembl:s
'
=>\
$sptrembl
,
'
refseq:s
'
=>\
$refseq
,
'
scop:s
'
=>\
$scop
,
'
out:s
'
=>\
$out
);
open
(
OUT
,"
>mapdb.map
");
print
STDERR
"
Getting SP mapping
\n
";
my
$in
=
Bio::
SeqIO
->
new
(
-
file
=>
$sp
,
'
-format
'
=>
'
swiss
');
while
(
my
$seq
=
$in
->
next_seq
()
)
{
print
OUT
$seq
->
accession
.
"
\t
SP
\n
";
my
@secs
=
$seq
->
get_secondary_accessions
;
if
(
@secs
)
{
foreach
my
$sec
(
@secs
)
{
print
OUT
"
$sec
\t
SP
\n
";
}
}
}
print
STDERR
"
Getting SPTREMBL mapping
\n
";
my
$in1
=
Bio::
SeqIO
->
new
(
-
file
=>
$sptrembl
,
'
-format
'
=>
'
swiss
');
while
(
my
$seq1
=
$in1
->
next_seq
()
)
{
print
OUT
$seq1
->
accession
.
"
\t
SPTREMBL
\n
";
my
@secs
=
$seq1
->
get_secondary_accessions
;
if
(
@secs
)
{
foreach
my
$sec
(
@secs
)
{
print
OUT
"
$sec
\t
SPTREMBL
\n
";
}
}
}
print
"
Getting refseq mapping
\n
";
open
(
REFSEQ
,"
$refseq
")
||
die
"
Can't open file
\n
";
$/
=
"
\
/
\
/
\n
";
while
(
<
REFSEQ
>
)
{
my
(
$prot_ac
)
=
$_
=~
/ACCESSION\s+(\S+)/
;
my
(
$dna_ac
)
=
$_
=~
/DBSOURCE REFSEQ: accession\s+(\w+)/
;
if
(
$dna_ac
)
{
print
OUT
"
$dna_ac
\t
REFSEQ
\n
";
}
}
close
(
REFSEQ
);
$/
=
"
\n
";
print
"
Getting Scop mapping
\n
";
open
(
SCOP
,"
$scop
")
||
die
"
Can't open file
\n
";
while
(
<
SCOP
>
)
{
my
(
$scop_ac
)
=
$_
=~
/^>(\S+)/
;
if
(
$scop_ac
)
{
print
OUT
"
$scop_ac
\t
SCOP
\n
";
}
}
close
(
SCOP
);
close
(
OUT
);
misc-scripts/protein_match/get_embl_mim_mapping.pl
View file @
123a2a50
...
...
@@ -37,7 +37,7 @@ while ( my $seq1 = $in1->next_seq() ) {
foreach
my
$link
(
@dblink
)
{
if
((
$link
->
database
eq
"
EMBL
")
||
(
$link
->
database
eq
"
MIM
")
||
(
$link
->
database
eq
"
PDB
")
)
{
if
((
$link
->
database
eq
"
EMBL
")
||
(
$link
->
database
eq
"
MIM
"))
{
if
(
!
defined
$map
{
$ac
})
{
die
"
Can't map
$ac
\n
";
...
...
misc-scripts/protein_match/process_pmach.pl
View file @
123a2a50
...
...
@@ -21,20 +21,23 @@ birney@ebi.ac.uk
use
Getopt::
Long
;
my
(
$ens
,
$sp
,
$refseq
);
my
(
$ens
,
$sp
,
$refseq
,
$pdb
);
&GetOptions
(
'
ens:s
'
=>\
$ens
,
'
sp:s
'
=>\
$sp
,
'
refseq:s
'
=>\
$refseq
'
refseq:s
'
=>\
$refseq
,
'
pdb:s
'
=>\
$pdb
);
&runpmatch
();
&postprocesspmatch
(
$sp
);
&postprocesspmatch
(
$refseq
);
&postprocesspmatch
(
$pdb
);
&finalprocess
(
$sp
);
&finalprocess
(
$refseq
);
&finalprocess
(
$pdb
);
sub
runpmatch
{
print
STDERR
"
Running pmatch
\n
";
...
...
@@ -42,9 +45,13 @@ sub runpmatch {
#Run pmatch and store the data in files which will be kept for debugging
my
$pmatch1
=
"
/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14
$sp
$ens
> ens_sp_rawpmatch
";
my
$pmatch2
=
"
/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14
$refseq
$ens
> ens_refseq_rawpmatch
";
my
$pmatch3
=
"
/nfs/griffin2/rd/bin.ALPHA/pmatch -T 14
$pdb
$ens
> ens_pdb_rawpmatch
";
system
(
$pmatch1
);
# == 0 or die "$0\Error running '$pmatch1' : $!";
system
(
$pmatch2
);
#== 0 or die "$0\Error running '$pmatch2' : $!";
system
(
$pmatch3
);
#== 0 or die "$0\Error running '$pmatch2' : $!";
}
...
...
@@ -61,11 +68,17 @@ sub postprocesspmatch {
open
(
PROC
,
"
ens_sp_rawpmatch
")
||
die
"
Can't open File
\n
";
}
els
e
{
els
if
(
$db
eq
$refseq
)
{
print
STDERR
"
Postprocessing pmatch for REFSEQ mapping
\n
";
open
(
OUT
,
"
>ens_refseq.processed
")
||
die
"
Can't open File
\n
";;
open
(
PROC
,
"
ens_refseq_rawpmatch
")
||
die
"
Can't open file ens_refseq_rawpmatch
\n
";
}
elsif
(
$db
eq
$pdb
)
{
print
STDERR
"
Postprocessing pmatch for PDB mapping
\n
";
open
(
OUT
,
"
>ens_pdb.processed
")
||
die
"
Can't open File
\n
";;
open
(
PROC
,
"
ens_pdb_rawpmatch
")
||
die
"
Can't open file ens_refseq_rawpmatch
\n
";
}
while
(
<
PROC
>
)
{
#538 COBP00000033978 1 538 35.3 Q14146 1 538 35.3
...
...
@@ -109,12 +122,19 @@ sub finalprocess {
open
(
OUT
,
"
>ens_sp.final
");
}
els
e
{
els
if
(
$db
eq
$refseq
)
{
print
STDERR
"
Getting final mapping for REFSEQ mapping
\n
";
open
(
PROC
,
"
ens_refseq.processed
")
||
die
"
Can' open file ens_refseq.processed
\n
";
open
(
OUT
,
"
>refseq.final
");
open
(
OUT
,
"
>
ens_
refseq.final
");
}
elsif
(
$db
eq
$pdb
)
{
print
STDERR
"
Getting final mapping for PDB mapping
\n
";
open
(
PROC
,
"
ens_refseq.processed
")
||
die
"
Can' open file ens_refseq.processed
\n
";
open
(
OUT
,
"
>ens_pdb.final
");
}
my
%hash2
;
while
(
<
PROC
>
)
{
my
(
$ens
,
$known
,
$perc
)
=
split
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment