Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
a0cde95e
Commit
a0cde95e
authored
23 years ago
by
Eduardo Eyras
Browse files
Options
Downloads
Patches
Plain Diff
same changes as in branch 121
parent
8d844094
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
modules/Bio/EnsEMBL/Utils/GeneComparison.pm
+86
-11
86 additions, 11 deletions
modules/Bio/EnsEMBL/Utils/GeneComparison.pm
with
86 additions
and
11 deletions
modules/Bio/EnsEMBL/Utils/GeneComparison.pm
+
86
−
11
View file @
a0cde95e
...
...
@@ -255,6 +255,8 @@ sub cluster_Genes {
# create a new cluster
my
$cluster
=
Bio::EnsEMBL::Utils::
GeneCluster
->
new
();
my
$cluster_count
=
1
;
my
@clusters
;
# pass in the types we're using
my
(
$types1
,
$types2
)
=
$self
->
gene_Types
;
...
...
@@ -262,7 +264,7 @@ sub cluster_Genes {
# put the first gene into these cluster
$cluster
->
put_Genes
(
$sorted_genes
[
0
]
);
push
(
@clusters
,
$cluster
);
$self
->
gene_Clusters
(
$cluster
);
# loop over the rest of the genes
...
...
@@ -277,29 +279,48 @@ sub cluster_Genes {
next
LOOP1
;
}
}
# if not in this cluster compare to the ($limit) previous clusters
my
$limit
=
6
;
if
(
$found
==
0
&&
$cluster_count
>
1
){
my
$lookup
=
1
;
while
(
!
(
$cluster_count
<=
$lookup
)
&&
!
(
$lookup
>
$limit
)
){
#print STDERR "cluster_count: $cluster_count, looking at ".($cluster_count - $lookup)."\n";
my
$previous_cluster
=
$clusters
[
$cluster_count
-
1
-
$lookup
];
foreach
my
$gene_in_cluster
(
$previous_cluster
->
get_Genes
){
if
(
_compare_Genes
(
$sorted_genes
[
$c
],
$gene_in_cluster
)
){
$previous_cluster
->
put_Genes
(
$sorted_genes
[
$c
]
);
$found
=
1
;
next
LOOP1
;
}
}
$lookup
++
;
}
}
if
(
$found
==
0
){
# if not-clustered create a new GeneCluser
$cluster
=
new
Bio::EnsEMBL::Utils::
GeneCluster
;
$cluster
->
gene_Types
(
$types1
,
$types2
);
$cluster
->
put_Genes
(
$sorted_genes
[
$c
]
);
$cluster_count
++
;
push
(
@clusters
,
$cluster
);
$self
->
gene_Clusters
(
$cluster
);
}
}
# put all unclustered genes (annotated and predicted) into one separate array
my
$time2
=
time
();
print
STDERR
"
time for clustering:
"
.
(
$time2
-
$time1
)
.
"
\n
";
my
@clusters
;
my
@
new_
clusters
;
foreach
my
$cl
(
$self
->
gene_Clusters
){
if
(
$cl
->
get_Gene_Count
==
1
){
$self
->
unclustered_Genes
(
$cl
);
# this push the cluster into array @{ $self->{'_unclustered_genes'} }
}
else
{
push
(
@clusters
,
$cl
);
push
(
@
new_
clusters
,
$cl
);
}
}
$self
->
flush_gene_Clusters
;
$self
->
gene_Clusters
(
@clusters
);
$self
->
gene_Clusters
(
@
new_
clusters
);
my
@unclustered
=
$self
->
unclustered_Genes
;
return
@clusters
;
return
@
new_
clusters
;
}
######################################################################################
...
...
@@ -476,7 +497,6 @@ sub cluster_Transcripts {
my
(
$self
,
@transcripts
)
=
@_
;
if
(
!
defined
(
@transcripts
)
){
my
@transcripts
=
();
my
@genes
=
(
@
{
$self
->
{'
_gene_array1
'}
},
@
{
$self
->
{'
_gene_array2
'}
}
);
foreach
my
$gene
(
@genes
){
my
@more_transcripts
=
$gene
->
each_Transcript
;
...
...
@@ -489,18 +509,51 @@ sub cluster_Transcripts {
my
%start_table
;
my
$i
=
0
;
foreach
my
$transcript
(
@transcripts
){
$start_table
{
$i
}
=
$transcript
->
start_exon
->
start
;
my
$start
;
my
$seqname
;
my
@exons
=
$transcript
->
get_all_Exons
;
@exons
=
sort
{
$a
->
start
<=>
$b
->
start
}
@exons
;
if
(
$exons
[
0
]
->
start
>
$exons
[
0
]
->
end
){
$start
=
$exons
[
0
]
->
end
;
}
else
{
$start
=
$exons
[
0
]
->
start
;
}
$start
=
$transcript
->
start_exon
->
start
;
$start_table
{
$i
}
=
$start
;
$i
++
;
# if some exons in the transcript fall outside the vc, they will not be converted to vc coordinates
foreach
my
$exon
(
@exons
){
unless
(
$seqname
){
$seqname
=
$exon
->
seqname
;
}
unless
(
$seqname
eq
$exon
->
seqname
){
$self
->
warn
("
transcript
"
.
$transcript
->
stable_id
.
"
(
"
.
$transcript
->
dbID
.
"
) is partly
"
.
"
outside the contig
");
last
;
}
}
}
my
@sorted_transcripts
=
();
foreach
my
$pos
(
sort
{
$start_table
{
$a
}
<=>
$start_table
{
$b
}
}
keys
%start_table
){
push
(
@sorted_transcripts
,
$transcripts
[
$pos
]);
}
@transcripts
=
@sorted_transcripts
;
# test
foreach
my
$tran
(
@transcripts
){
print
STDERR
"
\n
transcript:
"
.
$tran
->
stable_id
.
"
internal_id:
"
.
$tran
->
dbID
.
"
\n
";
foreach
my
$exon
(
$tran
->
get_all_Exons
){
print
STDERR
$exon
->
seqname
.
"
"
.
$exon
->
start
.
"
:
"
.
$exon
->
end
.
"
\n
";
}
print
STDERR
"
\n
";
}
my
$time1
=
time
();
my
@clusters
;
# create a new cluster
my
$cluster
=
Bio::EnsEMBL::Utils::
TranscriptCluster
->
new
();
my
$cluster_count
=
1
;
# put the first transcript into these cluster
$cluster
->
put_Transcripts
(
$sorted_transcripts
[
0
]
);
...
...
@@ -519,13 +572,35 @@ sub cluster_Transcripts {
next
LOOP1
;
}
}
if
(
$found
==
0
){
# if not-clustered create a new TranscriptCluser
# if not in this cluster compare to the ($limit) previous clusters
my
$limit
=
6
;
if
(
$found
==
0
&&
$cluster_count
>
1
){
my
$lookup
=
1
;
while
(
!
(
$cluster_count
<=
$lookup
)
&&
!
(
$lookup
>
$limit
)
){
print
STDERR
"
cluster_count:
$cluster_count
, looking at
"
.
(
$cluster_count
-
$lookup
)
.
"
\n
";
my
$previous_cluster
=
$clusters
[
$cluster_count
-
1
-
$lookup
];
foreach
my
$t_in_cluster
(
$previous_cluster
->
get_Transcripts
){
if
(
_compare_Transcripts
(
$sorted_transcripts
[
$c
],
$t_in_cluster
)
){
$previous_cluster
->
put_Transcripts
(
$sorted_transcripts
[
$c
]
);
$found
=
1
;
next
LOOP1
;
}
}
$lookup
++
;
}
}
# if not-clustered create a new TranscriptCluser
if
(
$found
==
0
){
$cluster
=
new
Bio::EnsEMBL::Utils::
TranscriptCluster
;
$cluster
->
put_Transcript
(
$sorted_transcripts
[
$c
]
);
$cluster
->
put_Transcripts
(
$sorted_transcripts
[
$c
]
);
$cluster_count
++
;
push
(
@clusters
,
$cluster
);
# $self->transcript_Clusters( $cluster );
}
}
my
$time2
=
time
();
my
$time
=
$time2
-
$time1
;
print
STDERR
"
clustering time:
"
.
$time
.
"
\n
";
return
@clusters
;
}
...
...
@@ -914,11 +989,11 @@ sub find_missing_coding_Exons{
}
print
STDERR
scalar
(
@unpaired
)
.
"
transcripts unpaired
\n
";
foreach
my
$tran
(
@unpaired
){
print
STDERR
$tran
->
stable_id
.
"
\n
";
print
STDERR
"
stable_id:
"
.
$tran
->
stable_id
.
"
internal_id:
"
.
$tran
->
dbID
.
"
\n
";
}
print
STDERR
scalar
(
@doubled
)
.
"
transcripts repeated
\n
";
foreach
my
$tran
(
@doubled
){
print
STDERR
$tran
->
stable_id
.
"
\n
";
print
STDERR
"
stable_id:
"
.
$tran
->
stable_id
.
"
internal_id:
"
.
$tran
->
dbID
.
"
\n
";
}
# we return the hash with the arrays of transcript pairs as values, and
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment