Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl
Commits
2f258ca6
Commit
2f258ca6
authored
Feb 25, 2008
by
Andreas Kusalananda Kähäri
Browse files
Transplanting get_bins() method from the Attic, but it won't work right now.
parent
ba93cf5b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
206 additions
and
0 deletions
+206
-0
modules/Bio/EnsEMBL/Collection.pm
modules/Bio/EnsEMBL/Collection.pm
+206
-0
No files found.
modules/Bio/EnsEMBL/Collection.pm
View file @
2f258ca6
...
...
@@ -61,4 +61,210 @@ sub _create_feature_fast {
return
$feature
;
}
our
%VALID_BINNING_METHODS
=
(
'
count
'
=>
0
,
'
density
'
=>
0
,
# Same as 'count'.
'
indices
'
=>
1
,
'
index
'
=>
1
,
# Same as 'indices'.
'
entries
'
=>
2
,
'
entry
'
=>
2
,
# Same as 'entries'.
'
fractional_count
'
=>
3
,
'
weight
'
=>
3
,
# Same as 'fractional_count'.
'
coverage
'
=>
4
);
sub
get_bins
{
my
$this
=
shift
;
my
(
$nbins
,
$method_name
)
=
rearrange
(
[
'
NBINS
',
'
METHOD
'
],
@
_
);
if
(
!
$this
->
is_populated
()
)
{
throw
(
'
Can not bin a feature collection
'
.
'
without first having called populate()
'
);
}
if
(
!
defined
(
$nbins
)
)
{
throw
('
Missing NBINS argument
');
}
elsif
(
$nbins
<=
0
)
{
throw
('
Negative or zero NBINS argument
');
}
$method_name
||=
'
count
';
if
(
!
exists
(
$VALID_BINNING_METHODS
{
$method_name
}
)
)
{
throw
(
sprintf
("
Invalid binning method '%s', valid methods are: %s
",
$method_name
,
join
(
'
,
',
sort
(
keys
(
%VALID_BINNING_METHODS
)
)
)
)
);
}
my
$method
=
$VALID_BINNING_METHODS
{
$method_name
};
my
$slice
=
$this
->
slice
();
my
$slice_start
=
$slice
->
start
();
my
$bin_length
=
(
$slice
->
end
()
-
$slice_start
+
1
)
/
$nbins
;
my
@bins
=
map
(
$_
=
undef
,
0
..
$nbins
-
1
);
my
$entry_index
=
0
;
my
@bin_masks
;
foreach
my
$entry
(
@
{
$this
->
entries
()
}
)
{
my
$start_bin
=
int
(
(
$entry
->
[
ENTRY_SEQREGIONSTART
]
-
$slice_start
)
/
$bin_length
);
my
$end_bin
=
int
(
(
$entry
->
[
ENTRY_SEQREGIONEND
]
-
$slice_start
)
/
$bin_length
);
if
(
$end_bin
>=
$nbins
)
{
# This might happen for the very last entry.
# $end_bin = $nbins - 1;
}
if
(
$method
==
0
)
{
# For 'count' and 'density'.
for
(
my
$bin_index
=
$start_bin
;
$bin_index
<=
$end_bin
;
++
$bin_index
)
{
++
$bins
[
$bin_index
];
}
}
elsif
(
$method
==
1
)
{
# For 'indices' and 'index'
for
(
my
$bin_index
=
$start_bin
;
$bin_index
<=
$end_bin
;
++
$bin_index
)
{
push
(
@
{
$bins
[
$bin_index
]
},
$entry_index
);
}
++
$entry_index
;
}
elsif
(
$method
==
2
)
{
# For 'entries' and 'entry'.
for
(
my
$bin_index
=
$start_bin
;
$bin_index
<=
$end_bin
;
++
$bin_index
)
{
push
(
@
{
$bins
[
$bin_index
]
},
$entry
);
}
}
elsif
(
$method
==
3
)
{
# For 'fractional_count' and 'weight'.
if
(
$start_bin
==
$end_bin
)
{
++
$bins
[
$start_bin
];
}
else
{
my
$feature_length
=
$entry
->
[
ENTRY_SEQREGIONEND
]
-
$entry
->
[
ENTRY_SEQREGIONSTART
]
+
1
;
# The first bin...
$bins
[
$start_bin
]
+=
(
(
$start_bin
+
1
)
*$bin_length
-
(
$entry
->
[
ENTRY_SEQREGIONSTART
]
-
$slice_start
)
)
/
$feature_length
;
# The intermediate bins (if there are any)...
for
(
my
$bin_index
=
$start_bin
+
1
;
$bin_index
<=
$end_bin
-
1
;
++
$bin_index
)
{
$bins
[
$bin_index
]
+=
$bin_length
/
$feature_length
;
}
# The last bin...
$bins
[
$end_bin
]
+=
(
(
$entry
->
[
ENTRY_SEQREGIONEND
]
-
$slice_start
)
-
$end_bin*$bin_length
+
1
)
/
$feature_length
;
}
## end else [ if ( $start_bin == $end_bin)
}
elsif
(
$method
==
4
)
{
# For 'coverage'.
my
$feature_start
=
$entry
->
[
ENTRY_SEQREGIONSTART
]
-
$slice_start
;
my
$feature_end
=
$entry
->
[
ENTRY_SEQREGIONEND
]
-
$slice_start
;
if
(
!
defined
(
$bin_masks
[
$start_bin
]
)
||
(
defined
(
$bin_masks
[
$start_bin
]
)
&&
$bin_masks
[
$start_bin
]
!=
1
)
)
{
# Mask the $start_bin from the start of the feature to the end
# of the bin, or to the end of the feature (whichever occurs
# first).
my
$bin_start
=
int
(
$start_bin*$bin_length
);
my
$bin_end
=
int
(
(
$start_bin
+
1
)
*$bin_length
-
1
);
for
(
my
$pos
=
$feature_start
;
$pos
<=
$bin_end
&&
$pos
<=
$feature_end
;
++
$pos
)
{
$bin_masks
[
$start_bin
][
$pos
-
$bin_start
]
=
1
;
}
}
for
(
my
$bin_index
=
$start_bin
+
1
;
$bin_index
<=
$end_bin
-
1
;
++
$bin_index
)
{
# Mark the middle bins between $start_bin and $end_bin as fully
# masked out.
$bin_masks
[
$bin_index
]
=
1
;
}
if
(
$end_bin
!=
$start_bin
)
{
if
(
!
defined
(
$bin_masks
[
$end_bin
]
)
||
(
defined
(
$bin_masks
[
$end_bin
]
)
&&
$bin_masks
[
$end_bin
]
!=
1
)
)
{
# Mask the $end_bin from the start of the bin to the end of
# the feature, or to the end of the bin (whichever occurs
# first).
my
$bin_start
=
int
(
$end_bin*$bin_length
);
my
$bin_end
=
int
(
(
$end_bin
+
1
)
*$bin_length
-
1
);
for
(
my
$pos
=
$bin_start
;
$pos
<=
$feature_end
&&
$pos
<=
$bin_end
;
++
$pos
)
{
$bin_masks
[
$end_bin
][
$pos
-
$bin_start
]
=
1
;
}
}
}
}
## end elsif ( $method == 4 )
}
## end foreach my $entry ( @{ $this...
if
(
$method
==
4
)
{
# For the 'coverage' method: Finish up by going through @bin_masks
# and sum up the arrays.
for
(
my
$bin_index
=
0
;
$bin_index
<
$nbins
;
++
$bin_index
)
{
if
(
defined
(
$bin_masks
[
$bin_index
]
)
)
{
if
(
!
ref
(
$bin_masks
[
$bin_index
]
)
)
{
$bins
[
$bin_index
]
=
1
;
}
else
{
$bins
[
$bin_index
]
=
scalar
(
grep
(
defined
(
$_
),
@
{
$bin_masks
[
$bin_index
]
}
)
)
/
$bin_length
;
}
}
}
}
return
\
@bins
;
}
## end sub get_bins
1
;
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment