Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
823ebda6
Commit
823ebda6
authored
19 years ago
by
Glenn Proctor
Browse files
Options
Downloads
Patches
Plain Diff
GO xrefs hang off translations, not genes.
Also refactoring.
parent
47680c9e
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
misc-scripts/xref_projection/project_display_xrefs.pl
+180
-58
180 additions, 58 deletions
misc-scripts/xref_projection/project_display_xrefs.pl
with
180 additions
and
58 deletions
misc-scripts/xref_projection/project_display_xrefs.pl
+
180
−
58
View file @
823ebda6
use
strict
;
# Sets display_xref_ids for novel genes in the "to" database based
# on their orthologs in the "from" database.
Orthology relationships
# are read from a Compara database.
# on their orthologs in the "from" database.
Can also project GO xrefs.
#
Orthology relationships
are read from a Compara database.
use
Getopt::
Long
;
use
Bio::EnsEMBL::
Registry
;
...
...
@@ -11,24 +11,36 @@ use Bio::EnsEMBL::DBSQL::GeneAdaptor;
my
$method_link_type
=
"
ENSEMBL_ORTHOLOGUES
";
my
(
$conf
,
$compara
,
$from_species
,
$
to_
species
,
$print
,
$store
);
my
(
$conf
,
$compara
,
$from_species
,
@
to_
multi
,
$print
,
$store
,
$names
,
$go_terms
);
GetOptions
('
conf=s
'
=>
\
$conf
,
'
compara=s
'
=>
\
$compara
,
'
from=s
'
=>
\
$from_species
,
'
to=s
'
=>
\
$
to_
species
,
'
to=s
'
=>
\
@
to_
multi
,
'
method=s
'
=>
\
$method_link_type
,
'
names
'
=>
\
$names
,
'
go_terms
'
=>
\
$go_terms
,
'
print
'
=>
\
$print
,
'
store
'
=>
\
$store
,
'
help
'
=>
sub
{
usage
();
exit
(
0
);
});
if
(
!
$compara
||
!
$from_species
||
!
$to_species
)
{
@to_multi
=
split
(
/,/
,
join
('
,
',
@to_multi
));
if
(
!
$conf
||
!
$compara
||
!
$from_species
||
!
@to_multi
)
{
usage
();
exit
(
1
);
}
if
(
!
$go_terms
&&
!
$names
)
{
print
"
One or both of --names or --go_terms must be specified
\n
";
print
"
Use --help for more detailed usage informaion
\n
";
exit
(
1
);
}
# Take values from ENSEMBL_REGISTRY environment variable or from ~/.ensembl_init
# if no reg_conf file is given.
Bio::EnsEMBL::
Registry
->
no_version_check
(
1
);
...
...
@@ -39,90 +51,193 @@ my $ha = Bio::EnsEMBL::Registry->get_adaptor($compara, 'compara', 'Homology')
my
$ma
=
Bio::EnsEMBL::
Registry
->
get_adaptor
(
$compara
,
'
compara
',
'
Member
');
my
$from_ga
=
Bio::EnsEMBL::
Registry
->
get_adaptor
(
$from_species
,
'
core
',
'
Gene
');
my
$to_ga
=
Bio::EnsEMBL::
Registry
->
get_adaptor
(
$to_species
,
'
core
',
'
Gene
');
my
$to_dbea
=
Bio::EnsEMBL::
Registry
->
get_adaptor
(
$to_species
,
'
core
',
'
DBEntry
');
my
$mlss
=
$mlssa
->
fetch_by_method_link_type_registry_aliases
(
$method_link_type
,
[
$from_species
,
$to_species
]);
my
$changed_names
=
0
;
my
$changed_go_xrefs
=
0
;
# Get all genes, find homologies, set xrefs
foreach
my
$to_species
(
@to_multi
)
{
my
$changed
=
0
;
my
$unchanged
=
0
;
my
$str
=
"
gene display_xrefs and descriptions
"
if
(
$names
);
$str
.=
"
and
"
if
(
$names
&&
$go_terms
);
$str
.=
"
GO terms
"
if
(
$go_terms
);
foreach
my
$gene_id
(
@
{
$from_ga
->
list_dbIDs
})
{
print
"
Projecting
$str
from
$from_species
to
$to_species
\n
";
my
$
gene
=
$from_ga
->
f
et
ch_by_dbID
(
$gene_id
);
next
unless
(
$gene
->
biotype
eq
"
protein_coding
"
);
my
$
to_ga
=
Bio::EnsEMBL::
Registry
->
g
et
_adaptor
(
$to_species
,
'
core
',
'
Gene
'
);
my
$to_dbea
=
Bio::EnsEMBL::
Registry
->
get_adaptor
(
$to_species
,
'
core
',
'
DBEntry
'
);
my
$member
=
$ma
->
fetch_by_source_stable_id
("
ENSEMBLGENE
",
$gene
->
stable_id
);
next
unless
(
defined
$member
);
my
$mlss
=
$mlssa
->
fetch_by_method_link_type_registry_aliases
(
$method_link_type
,
[
$from_species
,
$to_species
]);
my
$homologies
=
$ha
->
fetch_all_by_Member_MethodLinkSpeciesSet
(
$member
,
$mlss
);
# Get all genes, find homologies, set xrefs
project_homologies
(
$homologies
);
foreach
my
$gene_id
(
@
{
$from_ga
->
list_dbIDs
})
{
}
my
$gene
=
$from_ga
->
fetch_by_dbID
(
$gene_id
);
# next unless ($gene->biotype eq "protein_coding");
my
$member
=
$ma
->
fetch_by_source_stable_id
("
ENSEMBLGENE
",
$gene
->
stable_id
);
next
unless
(
defined
$member
);
print
"
Modified
$changed
genes, didn't modify
$unchanged
genes
\n
";
my
$homologies
=
$ha
->
fetch_all_by_Member_MethodLinkSpeciesSet
(
$member
,
$mlss
);
project_homologies
(
$homologies
,
$to_ga
,
$to_dbea
,
$names
,
$go_terms
,
$ma
);
}
print
"
Projected
$changed_names
gene names, out of a total of
"
.
scalar
(
@
{
$from_ga
->
list_dbIDs
})
.
"
\n
"
if
(
$names
);
print
"
Projected
$changed_go_xrefs
GO terms
\n
"
if
(
$go_terms
);
}
# ----------------------------------------------------------------------
# Project homologies from first to subsequent species
sub
project_homologies
()
{
my
$homologies
=
shift
;
my
(
$homologies
,
$to_ga
,
$to_dbea
,
$names
,
$go_terms
,
$ma
)
=
@_
;
my
$changed
=
0
;
foreach
my
$homology
(
@
{
$homologies
})
{
my
@mas
=
@
{
$homology
->
get_all_Member_Attribute
};
my
(
$from_member
,
$from_attribute
)
=
@
{
$mas
[
0
]};
my
(
$to_member
,
$to_attribute
)
=
@
{
$mas
[
1
]};
# ----------------------------------------
# Display names and descriptions
if
(
$names
)
{
$changed_names
+=
project_display_names
(
$to_ga
,
$to_dbea
,
$ma
,
$from_member
,
$to_member
);
}
# ----------------------------------------
# GO terms
if
(
$go_terms
)
{
$changed_go_xrefs
+=
project_go_terms
(
$to_ga
,
$to_dbea
,
$ma
,
$from_attribute
,
$to_attribute
);
}
# ----------------------------------------
}
return
$changed
;
}
foreach
my
$homology
(
@
{
$homologies
})
{
# --------------------------------------------------------------------------------
my
@mas
=
@
{
$homology
->
get_all_Member_Attribute
};
my
(
$from_member
,
$from_attribute
)
=
@
{
$mas
[
0
]};
my
(
$to_member
,
$to_attribute
)
=
@
{
$mas
[
1
]};
sub
project_display_names
{
my
$to_g
ene
=
$to_ga
->
fetch_by_stable_id
(
$to_member
->
stable_id
())
;
my
(
$to_g
a
,
$to_dbea
,
$ma
,
$from_member
,
$to_member
)
=
@_
;
# if no display name set, do the projection
if
(
!
$to_gene
->
external_name
())
{
my
$to_gene
=
$to_ga
->
fetch_by_stable_id
(
$to_member
->
stable_id
());
my
$from_gene
=
$from_ga
->
fetch_by_stable_id
(
$from_member
->
stable_id
());
my
$dbEntry
=
$from_gene
->
display_xref
();
my
$changed
=
0
;
# TODO only do this for certain types of DBEntry?
# if no display name set, do the projection
if
(
!
$to_gene
->
external_name
())
{
if
(
$dbEntry
)
{
my
$from_gene
=
$from_ga
->
fetch_by_stable_id
(
$from_member
->
stable_id
());
my
$dbEntry
=
$from_gene
->
display_xref
();
# Modify the dbEntry to indicate it's not from this species
my
$txt
=
"
[from
$from_species
gene
"
.
$from_gene
->
stable_id
()
.
"
]
";
$dbEntry
->
display_id
(
$dbEntry
->
display_id
()
.
$txt
);
# TODO only do this for certain types of DBEntry?
# Add the xref to the "to" gene, set its status to "KNOWN", modify the description and update display_xref
$to_gene
->
add_DBEntry
(
$dbEntry
);
$to_gene
->
status
("
KNOWN
");
$to_gene
->
description
(
$from_gene
->
description
()
.
$txt
)
if
(
$from_gene
->
description
());
$to_gene
->
display_xref
(
$dbEntry
);
if
(
$dbEntry
)
{
if
(
$print
)
{
print
$to_gene
->
stable_id
()
.
"
-->
"
.
$dbEntry
->
display
_id
()
.
"
\n
";
}
# Modify the dbEntry to indicate it's not from this species
my
$txt
=
"
[from
$from_species
gene
"
.
$from_gene
->
stable
_id
()
.
"
]
";
$dbEntry
->
display_id
(
$dbEntry
->
display_id
()
.
$txt
);
# store - need to add the DBEntry and also update the gene display xref id
if
(
$store
)
{
$to_dbea
->
store
(
$dbEntry
,
$to_gene
,
'
Gene
');
$to_ga
->
update
(
$to_gene
);
}
# Add the xref to the "to" gene, set its status to "KNOWN", modify the description and update display_xref
$to_gene
->
add_DBEntry
(
$dbEntry
);
$to_gene
->
status
("
KNOWN
");
$to_gene
->
description
(
$from_gene
->
description
()
.
$txt
)
if
(
$from_gene
->
description
());
$to_gene
->
display_xref
(
$dbEntry
);
print
$to_gene
->
stable_id
()
.
"
-->
"
.
$dbEntry
->
display_id
()
.
"
\n
"
if
(
$print
);
$changed
++
;
}
# store - need to add the DBEntry and also update the gene display xref id
if
(
$store
)
{
$to_dbea
->
store
(
$dbEntry
,
$to_gene
,
'
Gene
');
$to_ga
->
update
(
$to_gene
);
}
}
else
{
$changed
++
;
$unchanged
++
;
}
}
}
}
return
$changed
;
}
# --------------------------------------------------------------------------------
sub
project_go_terms
{
my
(
$to_ga
,
$to_dbea
,
$ma
,
$from_attribute
,
$to_attribute
)
=
@_
;
my
$changed
=
0
;
# GO xrefs are linked to translations, not genes
my
$from_translation
=
$ma
->
fetch_by_dbID
(
$from_attribute
->
peptide_member_id
())
->
get_Translation
();
my
$to_translation
=
$ma
->
fetch_by_dbID
(
$to_attribute
->
peptide_member_id
())
->
get_Translation
();
my
$to_go_xrefs
=
$to_translation
->
get_all_DBEntries
();
foreach
my
$dbEntry
(
@
{
$from_translation
->
get_all_DBEntries
()})
{
next
if
$dbEntry
->
dbname
()
ne
"
GO
";
# check that each from GO term isn't already projected, then project it
next
if
go_xref_exists
(
$dbEntry
,
$to_go_xrefs
);
# linkage types etc should be taken care of automatically
$to_translation
->
add_DBEntry
(
$dbEntry
);
my
$txt
=
"
[from
$from_species
translation
"
.
$from_translation
->
stable_id
()
.
"
]
";
$dbEntry
->
display_id
(
$dbEntry
->
display_id
()
.
$txt
);
print
$to_translation
->
stable_id
()
.
"
-->
"
.
$dbEntry
->
display_id
()
.
"
\n
"
if
(
$print
);
$to_dbea
->
store
(
$dbEntry
,
$to_translation
,
'
Translation
')
if
(
$store
);
$changed
++
;
}
return
$changed
;
}
# --------------------------------------------------------------------------------
sub
go_xref_exists
{
my
(
$dbEntry
,
$to_go_xrefs
)
=
@_
;
foreach
my
$xref
(
@
{
$to_go_xrefs
})
{
if
(
$xref
->
dbname
eq
$dbEntry
->
dbname
()
&&
$xref
->
primary_id
eq
$dbEntry
->
primary_id
())
{
return
1
;
}
}
return
0
;
}
# ----------------------------------------------------------------------
...
...
@@ -130,9 +245,9 @@ sub usage {
print
<<
"
EOF
";
Sets
display_xref_ids
for
novel
genes
in
the
"
to
"
database
based
on
their
orthologs
in
the
"
from
"
database
.
Orthology
relationships
are
read
from
a
Compara
database
.
Sets
display_xref_ids
and
/
or
GO
terms
for
novel
genes
in
the
"
to
"
database
based
on
their
orthologs
in
the
"
from
"
database
.
Orthology
relationships
are
read
from
a
Compara
database
.
perl
project_display_xrefs
.
pl
{
options
}
...
...
@@ -148,8 +263,15 @@ sub usage {
--
from
string
The
species
to
use
as
the
source
(
a
Bio::EnsEMBL::
Registry
alias
,
defined
in
config
file
)
--
to
string
The
target
species
--
to
string
The
target
species
.
(
a
Bio::EnsEMBL::
Registry
alias
,
defined
in
config
file
)
More
than
one
target
species
can
be
specified
by
using
several
--
to
arguments
or
a
comma
-
separated
list
,
e
.
g
.
-
from
human
-
to
dog
-
to
opossum
or
--
to
dog
,
opossum
--
names
Project
display
names
and
descriptions
.
--
go_terms
Project
GO
terms
.
[
--
print
]
Print
details
of
projection
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment