Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
f231086f
Commit
f231086f
authored
15 years ago
by
Ian Longden
Browse files
Options
Downloads
Patches
Plain Diff
sub transcript_display_xref_sources added
parent
ca1c702c
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
misc-scripts/xref_mapping/XrefMapper/DisplayXrefs.pm
+175
-3
175 additions, 3 deletions
misc-scripts/xref_mapping/XrefMapper/DisplayXrefs.pm
with
175 additions
and
3 deletions
misc-scripts/xref_mapping/XrefMapper/DisplayXrefs.pm
+
175
−
3
View file @
f231086f
...
...
@@ -18,6 +18,46 @@ my %transcript_to_translation;
my
%transcript_length
;
sub
transcript_display_xref_sources
{
my
@list
=
qw(miRBase
RFAM
HGNC_curated_gene
HGNC_automatic_gene
MGI_curated_gene
MGI_automatic_gene
Clone_based_vega_gene
Clone_based_ensembl_gene
HGNC_curated_transcript
HGNC_automatic_transcript
MGI_curated_transcript
MGI_automatic_transcript
Clone_based_vega_transcript
Clone_based_ensembl_transcript
IMGT/GENE_DB
HGNC
SGD
MGI
flybase_symbol
Anopheles_symbol
Genoscope_annotated_gene
Uniprot/SWISSPROT
Uniprot/Varsplic
RefSeq_peptide
RefSeq_dna
Uniprot/SPTREMBL
EntrezGene
IPI)
;
my
%ignore
;
$ignore
{"
EntrezGene
"}
=
'
FROM:RefSeq_[pd][en][pa].*_predicted
';
return
[
\
@list
,
\
%ignore
];
}
sub
new
{
my
(
$class
,
$mapper
)
=
@_
;
...
...
@@ -911,12 +951,12 @@ DXS
# print "$gene_id, $transcript_id, $p, $xref_id, $type, $label\n";
# if(defined($label) && $label =~ /\D+/){ # not just a number
if
(
$gene_id
!=
$last_gene
){
$self
->
check
(
$gene_id
,
$label
,
$gene_sth
,
"
Gene
");
$self
->
check
_label
(
$gene_id
,
$label
,
$gene_sth
,
"
Gene
");
$last_gene
=
$gene_id
;
}
if
(
$type
ne
"
Gene
"){
if
(
!
defined
(
$seen_transcript
{
$transcript_id
})){
# not seen yet so its the best
$self
->
check
(
$transcript_id
,
$label
,
$tran_sth
,
"
Transcript
");
$self
->
check
_label
(
$transcript_id
,
$label
,
$tran_sth
,
"
Transcript
");
}
$seen_transcript
{
$transcript_id
}
=
$xref_id
;
...
...
@@ -927,7 +967,7 @@ DXS
}
sub
check
{
sub
check
_label
{
my
$self
=
shift
;
my
$id
=
shift
;
my
$label
=
shift
;
...
...
@@ -945,4 +985,136 @@ sub check{
}
sub
while_your_feet_are_stomping
{
my
$self
=
shift
;
my
$sql
=
(
<<
SQL
);
CREATE
TABLE
gene_desc_prioritys
(
source_id
INT
NOT
NULL
,
priority
INT
NOT
NULL
,
PRIMARY
KEY
(
source_id
)
)
COLLATE
=
latin1_swedish_ci
TYPE
=
InnoDB
SQL
my
$sth
=
$self
->
xref
->
dbc
->
prepare
(
$sql
);
$sth
->
execute
;
$sth
->
finish
;
my
@presedence
=
$self
->
gene_description_sources
();
my
@regexps
=
$self
->
gene_description_filter_regexps
();
my
$i
=
0
;
my
$ins_p_sth
=
$self
->
xref
->
dbc
->
prepare
("
INSERT into gene_desc_prioritys (source_id, priority) values(?, ?)
");
my
$get_source_id_sth
=
$self
->
xref
->
dbc
->
prepare
("
select source_id from source where name like ?
");
#
# So the higher the number the better then
#
foreach
my
$name
(
reverse
(
@presedence
)){
$i
++
;
$get_source_id_sth
->
execute
(
$name
);
my
$source_id
;
$get_source_id_sth
->
bind_columns
(
\
$source_id
);
while
(
$get_source_id_sth
->
fetch
){
$ins_p_sth
->
execute
(
$source_id
,
$i
);
}
}
$ins_p_sth
->
finish
;
$get_source_id_sth
->
finish
;
#######################################################################
my
$gene_desc_sql
=
(
<<
DXS
);
SELECT
gtt
.
gene_id
,
x
.
xref_id
FROM
source
s, xref x, object_xref ox,
identity_xref
ix
,
gene_transcript_translation
gtt
,
gene_desc_prioritys
p
WHERE
x
.
source_id
=
s.source_id
AND s.source_id = p.sourc
e_id
AND
x
.
xref_id
=
ox
.
xref_id
AND
ox
.
ox_status
=
"
DUMP_OUT
"
AND
(
(
ox
.
ensembl_object_type
=
"
Transcript
"
and
gtt
.
transcript_id
=
ox
.
ensembl_id
)
OR
(
ox
.
ensembl_object_type
=
"
Translation
"
and
gtt
.
translation_id
=
ox
.
ensembl_id
)
OR
(
ox
.
ensembl_object_type
=
"
Gene
"
and
gtt
.
gene_id
=
ox
.
ensembl_id
)
)
AND
ox
.
object_xref_id
=
ix
.
object_xref_id
ORDER
BY
gtt
.
gene_id
DESC
,
p
.
priority
DESC
,
(
ix
.
target_identity
+
ix
.
query_identity
)
DESC
DXS
########################################################################
# my $get_desc_sql = "select description from xref where xref_id = ?";
my
$gene_sth
=
$self
->
core
->
dbc
->
prepare
("
select g.description from gene g where g.gene_id = ?
");
my
$last_gene
=
0
;
my
$gene_desc_sth
=
$self
->
xref
->
dbc
->
prepare
(
$gene_desc_sql
);
$gene_desc_sth
->
execute
();
my
(
$gene_id
,
$transcript_id
,
$p
,
$desc
);
# remove labvel after testig it is not needed
$gene_desc_sth
->
bind_columns
(
\
$gene_id
,
\
$transcript_id
,
\
$p
,
\
$desc
);
my
$gene_count
=
0
;
while
(
$gene_desc_sth
->
fetch
()){
# print "$gene_id, $transcript_id, $p, $xref_id, $type, $label\n";
if
(
$gene_id
!=
$last_gene
){
my
$filtered_description
=
$self
->
filter_by_regexp
(
$desc
,
\
@regexps
);
if
(
$filtered_description
ne
"")
{
$self
->
check_desc
(
$gene_id
,
$desc
,
$gene_sth
,
"
Gene
");
$gene_count
++
;
$last_gene
=
$gene_id
;
}
}
}
$gene_desc_sth
->
finish
;
print
"
$gene_count
gene descriptions added
\n
";
}
sub
filter_by_regexp
{
my
(
$self
,
$str
,
$regexps
)
=
@_
;
foreach
my
$regexp
(
@$regexps
)
{
$str
=~
s/$regexp//ig
;
}
return
$str
;
}
sub
check_desc
{
my
$self
=
shift
;
my
$id
=
shift
;
my
$desc
=
shift
;
my
$sth
=
shift
;
my
$type
=
shift
;
$sth
->
execute
(
$id
);
my
$old_desc
;
$sth
->
bind_columns
(
\
$old_desc
);
$sth
->
fetch
;
if
(
$old_desc
ne
$desc
){
print
"
ERROR:
$type
(
$id
) has different descriptions ??? old:
$old_desc
new:
$desc
\n
";
}
}
sub
gene_description_filter_regexps
{
return
();
}
1
;
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment