Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
ensembl-gh-mirror
ensembl
Commits
ddf36962
Commit
ddf36962
authored
Dec 18, 2002
by
Arne Stabenau
Browse files
BaseAlignFeature split on contig boundary now looses split codon alignment
parent
e7fec65c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
77 additions
and
229 deletions
+77
-229
modules/Bio/EnsEMBL/BaseAlignFeature.pm
modules/Bio/EnsEMBL/BaseAlignFeature.pm
+52
-6
modules/Bio/EnsEMBL/Root.pm
modules/Bio/EnsEMBL/Root.pm
+2
-0
modules/t/dnaPepAlignFeature.t
modules/t/dnaPepAlignFeature.t
+23
-223
No files found.
modules/Bio/EnsEMBL/BaseAlignFeature.pm
View file @
ddf36962
...
...
@@ -911,6 +911,7 @@ sub _transform_feature_to_RawContig{
my
@out
;
my
(
$hit_start
,
$hit_end
);
my
$codon_unused_bases
=
0
;
if
(
scalar
(
@mapped
)
>
1
)
{
#The feature needs to be mapped accross multiple contigs
...
...
@@ -931,14 +932,58 @@ sub _transform_feature_to_RawContig{
#caculate query and hit length for each portion of the split feature
#may need to round hit length to avoid 'partial' peptide
# decision of Michele to not cover split codons
my
$query_length
=
(
$mapped
[
$i
]
->
end
-
$mapped
[
$i
]
->
start
+
1
);
my
$query_start
=
$mapped
[
$i
]
->
start
();
my
$query_end
=
$mapped
[
$i
]
->
end
();
my
$hit_length
;
if
(
$self
->
_query_unit
==
$self
->
_hit_unit
){
# DnaDna and PepPep case
$hit_length
=
$query_length
;
}
elsif
(
$self
->
_query_unit
>
$self
->
_hit_unit
){
my
$tmp
=
(
$query_length
/
$self
->
_query_unit
);
$hit_length
=
sprintf
"
%.0f
",
$tmp
;
#round value up or down
}
elsif
(
$self
->
_hit_unit
>
$self
->
_query_unit
){
}
elsif
(
$self
->
_query_unit
>
$self
->
_hit_unit
){
# DnaPepAlign case
# my $tmp = ($query_length/$self->_query_unit);
# $hit_length = sprintf "%.0f", $tmp; #round value up or down
$hit_length
=
int
((
$query_length
-
$codon_unused_bases
)
/
$self
->
_query_unit
()
);
if
(
$codon_unused_bases
)
{
if
(
$feature
->
hstrand
()
==
1
)
{
$hit_start
++
;
}
else
{
$hit_end
--
;
}
}
my
$used_bases
=
$query_length
-
$codon_unused_bases
-
$hit_length*$self
->
_query_unit
();
if
(
$mapped
[
$i
]
->
strand
()
==
-
1
)
{
$query_end
-=
$codon_unused_bases
;
$query_start
+=
$used_bases
;
}
else
{
$query_start
+=
$codon_unused_bases
;
$query_end
-=
$used_bases
;
}
# new rest at the end ...
if
(
$used_bases
)
{
$codon_unused_bases
=
3
-
$used_bases
;
}
else
{
$codon_unused_bases
=
0
;
}
}
elsif
(
$self
->
_hit_unit
>
$self
->
_query_unit
){
# PepDnaAlign case (rare)
my
$tmp
=
(
$query_length*$self
->
_hit_unit
);
$hit_length
=
sprintf
"
%.0f
",
$tmp
;
#round value up or down
}
...
...
@@ -946,6 +991,7 @@ sub _transform_feature_to_RawContig{
if
(
$hit_length
==
0
){
next
SPLIT
;
}
if
(
$feature
->
hstrand
()
==
1
)
{
$hit_end
=
(
$hit_start
+
$hit_length
)
-
1
;
}
else
{
...
...
@@ -957,8 +1003,8 @@ sub _transform_feature_to_RawContig{
#create the new feature
my
$new_feature
=
Bio::EnsEMBL::
FeaturePair
->
new
;
$new_feature
->
start
(
$
mapped
[
$i
]
->
start
);
$new_feature
->
end
(
$
mapped
[
$i
]
->
end
);
$new_feature
->
start
(
$
query_
start
);
$new_feature
->
end
(
$
query_
end
);
$new_feature
->
strand
(
$mapped
[
$i
]
->
strand
);
$new_feature
->
score
(
$feature
->
score
);
$new_feature
->
percent_id
(
$feature
->
percent_id
);
...
...
modules/Bio/EnsEMBL/Root.pm
View file @
ddf36962
...
...
@@ -95,6 +95,8 @@ sub warn{
my
(
$self
,
$string
)
=
@_
;
my
$verbose
=
$self
->
verbose
;
$verbose
=
0
unless
defined
$verbose
;
if
(
$verbose
==
2
)
{
$self
->
throw
(
$string
);
...
...
modules/t/dnaPepAlignFeature.t
View file @
ddf36962
...
...
@@ -3,7 +3,7 @@ use lib 't';
BEGIN
{
$|
=
1
;
use
Test
;
plan
tests
=>
6
3
;
plan
tests
=>
3
9
;
}
use
MultiTestDB
;
...
...
@@ -11,6 +11,11 @@ use Bio::EnsEMBL::DnaPepAlignFeature;
use
Bio::EnsEMBL::
SeqFeature
;
use
Bio::EnsEMBL::
RawContig
;
use
TestUtils
qw (
debug
test_getter_setter
);
# switch on the debug prints
our
$verbose
=
0
;
my
(
$CHR
,
$START
,
$END
)
=
('
20
',
30_363_615
,
30_475_000
);
my
$CTG_BOUNDARY
=
62877
;
...
...
@@ -143,12 +148,12 @@ ok($f);
#
@feats
=
();
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
-
2
);
$fp
->
end
(
$CTG_BOUNDARY
);
$fp
->
start
(
$CTG_BOUNDARY
-
5
);
$fp
->
end
(
$CTG_BOUNDARY
);
$fp
->
strand
(
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
hstart
(
10
5
);
$fp
->
hstart
(
10
4
);
$fp
->
hend
(
105
);
$fp
->
hstrand
(
1
);
$fp
->
hseqname
('
dummy-hid
');
...
...
@@ -180,7 +185,7 @@ push(@feats,$fp);
$dnaf
=
Bio::EnsEMBL::
DnaPepAlignFeature
->
new
(
-
features
=>
\
@feats
);
ok
(
$dnaf
);
ok
(
$dnaf
->
cigar_string
eq
'
3
M3I6M6D3M
');
ok
(
$dnaf
->
cigar_string
eq
'
6
M3I6M6D3M
');
ok
(
$dnaf
->
validate
||
1
);
#validate doesn't return true but throws on fail
@dnafs
=
$dnaf
->
transform
;
...
...
@@ -189,63 +194,10 @@ ok($dnafs[0]->validate || 1);
ok
(
$dnafs
[
1
]
->
validate
||
1
);
#
# 22-27 create a dnaalign feature on a slice across a contig boundary
# and convert to raw contig coordinates
# (+ve strand, -ve hitstrand)
#
@feats
=
();
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
-
2
);
$fp
->
end
(
$CTG_BOUNDARY
);
$fp
->
strand
(
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
hstart
(
108
);
$fp
->
hend
(
108
);
$fp
->
hstrand
(
-
1
);
$fp
->
hseqname
('
dummy-hid
');
push
(
@feats
,
$fp
);
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
+
4
);
$fp
->
end
(
$CTG_BOUNDARY
+
9
);
$fp
->
strand
(
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
seqname
(
1
);
$fp
->
hstart
(
106
);
$fp
->
hend
(
107
);
$fp
->
hstrand
(
-
1
);
$fp
->
hseqname
('
dummy-hid
');
push
(
@feats
,
$fp
);
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
+
10
);
$fp
->
end
(
$CTG_BOUNDARY
+
12
);
$fp
->
strand
(
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
hstart
(
103
);
$fp
->
hend
(
103
);
$fp
->
hstrand
(
-
1
);
$fp
->
hseqname
('
dummy-hid
');
push
(
@feats
,
$fp
);
$dnaf
=
Bio::EnsEMBL::
DnaPepAlignFeature
->
new
(
-
features
=>
\
@feats
);
ok
(
$dnaf
);
ok
(
$dnaf
->
cigar_string
eq
'
3M3I6M6D3M
');
ok
(
$dnaf
->
validate
||
1
);
#validate doesn't return true but throws on fail
my
@dnafs
=
$dnaf
->
transform
;
ok
(
scalar
(
@dnafs
)
==
2
);
ok
(
$dnafs
[
0
]
->
validate
||
1
);
ok
(
$dnafs
[
1
]
->
validate
||
1
);
#
# 2
8-33
create a dnaalign feature on a slice across a contig boundary
# 2
2-27
create a dnaalign feature on a slice across a contig boundary
# and convert to raw contig coordinates
# (-ve strand, +ve hitstrand)
#
...
...
@@ -296,65 +248,21 @@ ok($dnaf->validate || 1); #validate doesn't return true but throws on fail
@dnafs
=
$dnaf
->
transform
;
ok
(
scalar
(
@dnafs
)
==
2
);
ok
(
$dnafs
[
0
]
->
validate
||
1
);
ok
(
$dnafs
[
1
]
->
validate
||
1
);
#
# 34-39 create a dnaalign feature on a slice across a contig boundary
# and convert to raw contig coordinates
# (-ve strand, -ve hitstrand)
#
@feats
=
();
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
+
3
);
$fp
->
end
(
$CTG_BOUNDARY
+
5
);
$fp
->
strand
(
-
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
hstart
(
108
);
$fp
->
hend
(
108
);
$fp
->
hstrand
(
-
1
);
$fp
->
hseqname
('
dummy-hid
');
push
(
@feats
,
$fp
);
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
-
6
);
$fp
->
end
(
$CTG_BOUNDARY
-
1
);
$fp
->
strand
(
-
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
seqname
(
1
);
$fp
->
hstart
(
106
);
$fp
->
hend
(
107
);
$fp
->
hstrand
(
-
1
);
$fp
->
hseqname
('
dummy-hid
');
push
(
@feats
,
$fp
);
debug
(
"
Feature 0 dump
"
);
while
(
my
(
$k
,
$v
)
=
each
%
{
$dnafs
[
0
]}
)
{
debug
(
"
->
"
.
$k
.
"
=
"
.
$v
);
}
ok
(
$dnafs
[
0
]
->
validate
||
1
);
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
-
9
);
$fp
->
end
(
$CTG_BOUNDARY
-
7
);
$fp
->
strand
(
-
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
seqname
(
1
);
$fp
->
hstart
(
103
);
$fp
->
hend
(
103
);
$fp
->
hstrand
(
-
1
);
$fp
->
hseqname
('
dummy-hid
');
push
(
@feats
,
$fp
);
debug
(
"
Feature 1 dump
"
);
while
(
my
(
$k
,
$v
)
=
each
%
{
$dnafs
[
1
]}
)
{
debug
(
"
->
"
.
$k
.
"
=
"
.
$v
);
}
ok
(
$dnafs
[
1
]
->
validate
||
1
);
$dnaf
=
Bio::EnsEMBL::
DnaPepAlignFeature
->
new
(
-
features
=>
\
@feats
);
ok
(
$dnaf
);
ok
(
$dnaf
->
cigar_string
eq
'
3M3I6M6D3M
');
ok
(
$dnaf
->
validate
||
1
);
#validate doesn't return true but throws on fail
@dnafs
=
$dnaf
->
transform
;
ok
(
scalar
(
@dnafs
)
==
2
);
ok
(
$dnafs
[
0
]
->
validate
||
1
);
ok
(
$dnafs
[
1
]
->
validate
||
1
);
#
...
...
@@ -368,7 +276,7 @@ $slice = $slice->invert;
#
#
40-45
create a dnaalign feature on a slice across a contig boundary
#
28-33
create a dnaalign feature on a slice across a contig boundary
# and convert to raw contig coordinates
# (+ve strand, +ve hitstrand)
#
...
...
@@ -420,63 +328,9 @@ ok($dnafs[0]->validate || 1);
ok
(
$dnafs
[
1
]
->
validate
||
1
);
#
# 46-51 create a dnaalign feature on a slice across a contig boundary
# and convert to raw contig coordinates
# (+ve strand, -ve hitstrand)
#
@feats
=
();
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
-
2
);
$fp
->
end
(
$CTG_BOUNDARY
);
$fp
->
strand
(
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
hstart
(
108
);
$fp
->
hend
(
108
);
$fp
->
hstrand
(
-
1
);
$fp
->
hseqname
('
dummy-hid
');
push
(
@feats
,
$fp
);
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
+
4
);
$fp
->
end
(
$CTG_BOUNDARY
+
9
);
$fp
->
strand
(
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
seqname
(
1
);
$fp
->
hstart
(
106
);
$fp
->
hend
(
107
);
$fp
->
hstrand
(
-
1
);
$fp
->
hseqname
('
dummy-hid
');
push
(
@feats
,
$fp
);
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
+
10
);
$fp
->
end
(
$CTG_BOUNDARY
+
12
);
$fp
->
strand
(
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
hstart
(
103
);
$fp
->
hend
(
103
);
$fp
->
hstrand
(
-
1
);
$fp
->
hseqname
('
dummy-hid
');
push
(
@feats
,
$fp
);
$dnaf
=
Bio::EnsEMBL::
DnaPepAlignFeature
->
new
(
-
features
=>
\
@feats
);
ok
(
$dnaf
);
ok
(
$dnaf
->
cigar_string
eq
'
3M3I6M6D3M
');
ok
(
$dnaf
->
validate
||
1
);
#validate doesn't return true but throws on fail
@dnafs
=
$dnaf
->
transform
;
ok
(
scalar
(
@dnafs
)
==
2
);
ok
(
$dnafs
[
0
]
->
validate
||
1
);
ok
(
$dnafs
[
1
]
->
validate
||
1
);
#
#
52-57
create a dnaalign feature on a slice across a contig boundary
#
34-39
create a dnaalign feature on a slice across a contig boundary
# and convert to raw contig coordinates
# (-ve strand, +ve hitstrand)
#
...
...
@@ -532,57 +386,3 @@ ok($dnafs[1]->validate || 1);
#
# 58-63 create a dnaalign feature on a slice across a contig boundary
# and convert to raw contig coordinates
# (-ve strand, -ve hitstrand)
#
@feats
=
();
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
+
3
);
$fp
->
end
(
$CTG_BOUNDARY
+
5
);
$fp
->
strand
(
-
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
hstart
(
108
);
$fp
->
hend
(
108
);
$fp
->
hstrand
(
-
1
);
$fp
->
hseqname
('
dummy-hid
');
push
(
@feats
,
$fp
);
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
-
6
);
$fp
->
end
(
$CTG_BOUNDARY
-
1
);
$fp
->
strand
(
-
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
seqname
(
1
);
$fp
->
hstart
(
106
);
$fp
->
hend
(
107
);
$fp
->
hstrand
(
-
1
);
$fp
->
hseqname
('
dummy-hid
');
push
(
@feats
,
$fp
);
$fp
=
new
Bio::EnsEMBL::
FeaturePair
;
$fp
->
start
(
$CTG_BOUNDARY
-
9
);
$fp
->
end
(
$CTG_BOUNDARY
-
7
);
$fp
->
strand
(
-
1
);
$fp
->
score
(
10
);
$fp
->
contig
(
$slice
);
$fp
->
seqname
(
1
);
$fp
->
hstart
(
103
);
$fp
->
hend
(
103
);
$fp
->
hstrand
(
-
1
);
$fp
->
hseqname
('
dummy-hid
');
push
(
@feats
,
$fp
);
$dnaf
=
Bio::EnsEMBL::
DnaPepAlignFeature
->
new
(
-
features
=>
\
@feats
);
ok
(
$dnaf
);
ok
(
$dnaf
->
cigar_string
eq
'
3M3I6M6D3M
');
ok
(
$dnaf
->
validate
||
1
);
#validate doesn't return true but throws on fail
@dnafs
=
$dnaf
->
transform
;
ok
(
scalar
(
@dnafs
)
==
2
);
ok
(
$dnafs
[
0
]
->
validate
||
1
);
ok
(
$dnafs
[
1
]
->
validate
||
1
);
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment