Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
2bf1df17
Commit
2bf1df17
authored
20 years ago
by
Ian Longden
Browse files
Options
Downloads
Patches
Plain Diff
xref sequence dumps are now configurable. See get_set_lists method
parent
6ca670c0
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm
+201
-58
201 additions, 58 deletions
misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm
with
201 additions
and
58 deletions
misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm
+
201
−
58
View file @
2bf1df17
...
@@ -33,36 +33,51 @@ Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk
...
@@ -33,36 +33,51 @@ Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk
sub
dump_seqs
{
sub
dump_seqs
{
my
(
$self
,
$xref
)
=
@_
;
my
(
$self
,
$xref
)
=
@_
;
$self
->
dump_ensembl
();
$self
->
dump_xref
(
$xref
);
$self
->
dump_xref
(
$xref
);
$self
->
dump_ensembl
();
}
}
sub
run_matching
{
sub
run_matching
{
my
(
$self
,
$xref
)
=
@_
;
my
@list
=
();
my
$i
=
0
;
foreach
my
$method
(
@
{
$self
->
method
()}){
my
@dna
=
();
push
@dna
,
$method
;
push
@dna
,
$xref
->
dir
.
"
/xref_
"
.
$i
.
"
_dna.fasta
";
push
@dna
,
$self
->
ensembl_dna_file
();
push
@list
,
\
@dna
;
my
@pep
=
();
push
@pep
,
$method
;
push
@pep
,
$xref
->
dir
.
"
/xref_
"
.
$i
.
"
_prot.fasta
";
push
@pep
,
$self
->
ensembl_protein_file
();
push
@list
,
\
@pep
;
$i
++
;
}
glenn_call
(
\
@list
);
print
"
NOT done yet:-)
\n
";
print
"
NOT done yet:-)
\n
";
}
}
sub
glenn_call
{
my
(
$lists
)
=
@_
;
foreach
my
$list
(
@$lists
){
my
(
$meth
,
$q
,
$t
)
=
@$list
;
print
"
HELLO:
"
.
$meth
.
"
\n\t
"
.
$q
.
"
\n\t
"
.
$t
.
"
\n\n
";
}
}
sub
store
{
sub
store
{
print
"
NOT done yet Either :-)
\n
";
print
"
NOT done yet Either :-)
\n
";
}
}
sub
dump_xref
{
sub
get_species_id_from_species_name
{
my
(
$
self
,
$xref
)
=
@_
;
my
(
$
xref
,
$species
)
=
@_
;
if
(
!
defined
(
$xref
->
species
())){
my
$sql
=
"
select species_id from species where name = '
"
.
$species
.
"
'
";
$xref
->
species
(
$self
->
species
);
}
if
(
!
defined
(
$xref
->
dir
())){
if
(
defined
(
$self
->
species
)){
$xref
->
species
(
$self
->
species
)
}
else
{
$xref
->
species
("
.
");
}
}
#
# the species specified must be in the database and hence have a species_id
#
my
$sql
=
"
select species_id from species where name = '
"
.
$xref
->
species
.
"
'
";
my
$dbi
=
$xref
->
dbi
();
my
$dbi
=
$xref
->
dbi
();
my
$sth
=
$dbi
->
prepare
(
$sql
);
my
$sth
=
$dbi
->
prepare
(
$sql
);
$sth
->
execute
();
$sth
->
execute
();
...
@@ -71,68 +86,188 @@ sub dump_xref{
...
@@ -71,68 +86,188 @@ sub dump_xref{
if
(
defined
@row
)
{
if
(
defined
@row
)
{
$species_id
=
$row
[
0
];
$species_id
=
$row
[
0
];
}
else
{
}
else
{
print
STDERR
"
Couldn't get ID for species
"
.
$
xref
->
species
.
"
\n
";
print
STDERR
"
Couldn't get ID for species
"
.
$species
.
"
\n
";
print
STDERR
"
It must be one of :-
\n
";
print
STDERR
"
It must be one of :-
\n
";
$sql
=
"
select name from species
";
$sql
=
"
select name from species
";
$sth
=
dbi
()
->
prepare
(
$sql
);
$sth
=
$
dbi
->
prepare
(
$sql
);
$sth
->
execute
();
$sth
->
execute
();
while
(
my
@row
=
$sth
->
fetchrow_array
()){
while
(
my
@row
=
$sth
->
fetchrow_array
()){
print
STDERR
$row
[
0
]
.
"
\n
";
print
STDERR
$row
[
0
]
.
"
\n
";
}
}
die
("
Please try again
\n
");
die
("
Please try again
:-)
\n
");
}
}
return
$species_id
;
}
#
# Dump out the sequences where the species has an id of species_id
sub
get_set_lists
{
# and the sequence type is 'dna'
my
(
$self
)
=
@_
;
#
$self
->
xref_dna_file
(
$xref
->
dir
.
"
/
"
.
$xref
->
species
.
"
_xref_dna.fasta
");
return
[["
method1
",["
homo_sapiens
","
RefSeq
"],["
homo_sapiens
","
UniProtSwissProt
"]],
open
(
XDNA
,"
>
"
.
$xref
->
dir
.
"
/
"
.
$xref
->
species
.
"
_xref_dna.fasta
")
||
die
"
Could not open xref_dna.fasta
";
["
method2
",[
$self
->
species
,"
*
"]],
my
$sql
=
"
select p.xref_id, p.sequence from primary_xref p, xref x
";
["
method3
",["
*
","
*
"]]];
$sql
.=
"
where p.xref_id = x.xref_id and
";
}
$sql
.=
"
p.sequence_type ='dna' and
";
$sql
.=
"
x.species_id =
"
.
$species_id
.
"
";
$sth
=
$xref
->
dbi
()
->
prepare
(
$sql
);
sub
get_source_id_from_source_name
{
my
(
$xref
,
$source
)
=
@_
;
my
$source_id
;
my
$sql
=
"
select source_id from source where name = '
"
.
$source
.
"
'
";
my
$dbi
=
$xref
->
dbi
();
my
$sth
=
$dbi
->
prepare
(
$sql
);
$sth
->
execute
();
my
@row
=
$sth
->
fetchrow_array
();
if
(
defined
$row
[
0
]
and
$row
[
0
]
ne
'')
{
$source_id
=
$row
[
0
];
# print $source."\t*".$row[0]."*\n";
}
else
{
print
STDERR
"
Couldn't get ID for source
"
.
$source
.
"
\n
";
print
STDERR
"
It must be one of :-
\n
";
$sql
=
"
select name from source
";
$sth
=
$dbi
->
prepare
(
$sql
);
$sth
->
execute
();
while
(
my
@row
=
$sth
->
fetchrow_array
()){
print
STDERR
$row
[
0
]
.
"
\n
";
}
die
("
Please try again :-)
\n
");
}
return
$source_id
;
}
sub
dump_xref
{
my
(
$self
,
$xref
)
=
@_
;
if
(
!
defined
(
$xref
->
dir
())){
if
(
defined
(
$self
->
dir
)){
$xref
->
species
(
$self
->
dir
);
}
else
{
$xref
->
dir
("
.
");
}
}
my
@method
=
();
my
@lists
=
@
{
$self
->
get_set_lists
()};
my
$i
=
0
;
foreach
my
$list
(
@lists
){
print
"
method->
"
.
@$list
[
0
]
.
"
\n
";
$method
[
$i
]
=
shift
@$list
;
my
$j
=
1
;
my
@source_id
-
();
my
@species_id
=
();
foreach
my
$element
(
@$list
){
while
(
my
$species
=
shift
(
@$element
)){
# print $j.")\t".$species."\n";
if
(
$species
ne
"
*
"){
$species_id
[
$j
]
=
get_species_id_from_species_name
(
$xref
,
$species
);
}
else
{
$species_id
[
$j
]
=
-
1
;
}
my
$source
=
shift
(
@$element
);
if
(
$source
ne
"
*
"){
$source_id
[
$j
]
=
get_source_id_from_source_name
(
$xref
,
$source
);
}
else
{
$source_id
[
$j
]
=
-
1
;
}
print
$j
.
"
\t
"
.
$source
.
"
\t
"
.
$source_id
[
$j
]
.
"
\n
";
print
$j
.
"
\t
"
.
$species
.
"
\t
"
.
$species_id
[
$j
]
.
"
\n
";
$j
++
;
}
}
#method data fully defined now
dump_subset
(
$xref
,
\
@species_id
,
\
@source_id
,
$i
);
$i
++
;
}
$self
->
method
(
\
@method
);
return
;
}
sub
dump_subset
{
my
(
$xref
,
$rspecies_id
,
$rsource_id
,
$index
)
=
@_
;
open
(
XDNA
,"
>
"
.
$xref
->
dir
()
.
"
/xref_
"
.
$index
.
"
_dna.fasta
")
||
die
"
Could not open xref_
"
.
$index
.
"
_dna.fasta
";
my
$sql
=
"
select p.xref_id, p.sequence, x.species_id , x.source_id
";
$sql
.=
"
from primary_xref p, xref x
";
$sql
.=
"
where p.xref_id = x.xref_id and
";
$sql
.=
"
p.sequence_type ='dna'
";
for
(
my
$j
=
1
;
$j
<
scalar
(
@$rspecies_id
);
$j
++
){
print
$j
.
"
\t
"
.
$$rspecies_id
[
$j
]
.
"
\t
"
.
$$rsource_id
[
$j
]
.
"
\n
";
}
# return $xref->dir."/xref_".$i."_dna.fasta";
my
$sth
=
$xref
->
dbi
()
->
prepare
(
$sql
);
$sth
->
execute
();
$sth
->
execute
();
my
$i
=
0
;
my
$i
=
0
;
while
(
my
@row
=
$sth
->
fetchrow_array
()){
while
(
my
@row
=
$sth
->
fetchrow_array
()){
$i
++
;
my
$pass
=
0
;
$row
[
1
]
=~
s/(.{60})/$1\n/g
;
for
(
my
$j
=
1
;
$j
<
scalar
(
@$rspecies_id
);
$j
++
){
print
XDNA
"
>
"
.
$row
[
0
]
.
"
\n
"
.
$row
[
1
]
.
"
\n
";
if
(
$$rspecies_id
[
$j
]
<
0
or
$row
[
2
]
==
$$rspecies_id
[
$j
]){
if
(
$i
>
10
){
if
(
$$rsource_id
[
$j
]
<
0
or
$row
[
3
]
==
$$rsource_id
[
$j
]){
goto
ENDDNA
;
$pass
=
1
;
}
}
}
if
(
$pass
){
$i
++
;
$row
[
1
]
=~
s/(.{60})/$1\n/g
;
print
XDNA
"
>
"
.
$row
[
0
]
.
"
\n
"
.
$row
[
1
]
.
"
\n
";
if
(
$i
>
10
){
goto
ENDDNA
;
}
}
}
}
}
ENDDNA:
ENDDNA:
close
XDNA
;
close
XDNA
;
#
# Dump out the sequences where the species has an id of species_id
open
(
XPRO
,"
>
"
.
$xref
->
dir
.
"
/xref_
"
.
$index
.
"
_prot.fasta
")
# and the sequence type is 'peptide'
||
die
"
Could not open xref_
"
.
$index
.
"
_prot.fasta
";
#
my
$sql
=
"
select p.xref_id, p.sequence, x.species_id , x.source_id
";
$self
->
xref_protein_file
(
$xref
->
dir
.
"
/
"
.
$self
->
species
.
"
_xref_prot.fasta
");
$sql
.=
"
from primary_xref p, xref x
";
open
(
XPEP
,"
>
"
.
$xref
->
dir
.
"
/
"
.
$self
->
species
.
"
_xref_prot.fasta
")
||
die
"
Could not open xref_prot.fasta
";
$sql
.=
"
where p.xref_id = x.xref_id and
";
$sql
=
"
select p.xref_id, p.sequence from primary_xref p, xref x
";
$sql
.=
"
p.sequence_type ='peptide'
";
$sql
.=
"
where p.xref_id = x.xref_id and
";
$sql
.=
"
p.sequence_type ='peptide' and
";
$sql
.=
"
x.species_id =
"
.
$species_id
.
"
";
$sth
=
$xref
->
dbi
()
->
prepare
(
$sql
);
$sth
=
$xref
->
dbi
()
->
prepare
(
$sql
);
$sth
->
execute
();
$sth
->
execute
();
$i
=
0
;
$i
=
0
;
while
(
my
@row
=
$sth
->
fetchrow_array
()){
while
(
my
@row
=
$sth
->
fetchrow_array
()){
$i
++
;
my
$pass
=
0
;
$row
[
1
]
=~
s/(.{60})/$1\n/g
;
for
(
my
$j
=
1
;
$j
<
scalar
(
@$rspecies_id
);
$j
++
){
print
XPEP
"
>
"
.
$row
[
0
]
.
"
\n
"
.
$row
[
1
]
.
"
\n
";
if
(
$$rspecies_id
[
$j
]
<
0
or
$row
[
2
]
==
$$rspecies_id
[
$j
]){
if
(
$i
>
10
){
if
(
$$rsource_id
[
$j
]
<
0
or
$row
[
3
]
==
$$rsource_id
[
$j
]){
goto
ENDXPEP
;
$pass
=
1
;
}
}
}
if
(
$pass
){
$i
++
;
$row
[
1
]
=~
s/(.{60})/$1\n/g
;
print
XPRO
"
>
"
.
$row
[
0
]
.
"
\n
"
.
$row
[
1
]
.
"
\n
";
if
(
$i
>
10
){
goto
ENDPRO
;
}
}
}
}
}
END
XPEP
:
END
PRO
:
close
XP
EP
;
close
XP
RO
;
}
}
sub
dump_ensembl
{
sub
dump_ensembl
{
my
(
$self
)
=
@_
;
my
(
$self
)
=
@_
;
...
@@ -177,14 +312,14 @@ sub fetch_and_dump_seq{
...
@@ -177,14 +312,14 @@ sub fetch_and_dump_seq{
foreach
my
$gene_id
(
@gene_ids
){
foreach
my
$gene_id
(
@gene_ids
){
$i
++
;
$i
++
;
my
$gene
=
$gene_adap
->
fetch_by_dbID
(
$gene_id
);
my
$gene
=
$gene_adap
->
fetch_by_dbID
(
$gene_id
);
print
"
gene
"
.
$gene
.
"
\n
";
#
print "gene ".$gene."\n";
foreach
my
$transcript
(
@
{
$gene
->
get_all_Transcripts
()})
{
foreach
my
$transcript
(
@
{
$gene
->
get_all_Transcripts
()})
{
my
$seq
=
$transcript
->
spliced_seq
();
my
$seq
=
$transcript
->
spliced_seq
();
$seq
=~
s/(.{60})/$1\n/g
;
$seq
=~
s/(.{60})/$1\n/g
;
print
DNA
"
>
"
.
$transcript
->
dbID
()
.
"
\n
"
.
$seq
.
"
\n
";
print
DNA
"
>
"
.
$transcript
->
dbID
()
.
"
\n
"
.
$seq
.
"
\n
";
my
$trans
=
$transcript
->
translation
();
my
$trans
=
$transcript
->
translation
();
my
$translation
=
$transcript
->
translate
();
my
$translation
=
$transcript
->
translate
();
print
"
tranlation
"
.
$translation
.
"
\n
";
#
print "tranlation ".$translation."\n";
my
$pep_seq
=
$translation
->
seq
();
my
$pep_seq
=
$translation
->
seq
();
$pep_seq
=~
s/(.{60})/$1\n/g
;
$pep_seq
=~
s/(.{60})/$1\n/g
;
print
PEP
"
>
"
.
$trans
->
dbID
()
.
"
\n
"
.
$pep_seq
.
"
\n
";
print
PEP
"
>
"
.
$trans
->
dbID
()
.
"
\n
"
.
$pep_seq
.
"
\n
";
...
@@ -236,6 +371,14 @@ sub ensembl_dna_file{
...
@@ -236,6 +371,14 @@ sub ensembl_dna_file{
return
$self
->
{
_ens_dna_file
};
return
$self
->
{
_ens_dna_file
};
}
}
sub
method
{
my
(
$self
,
$arg
)
=
@_
;
(
defined
$arg
)
&&
(
$self
->
{
_method
}
=
$arg
);
return
$self
->
{
_method
};
}
#sub get_ensembl_type{
#sub get_ensembl_type{
# my %type;
# my %type;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment