my $self = shift @_;
my $input_seqio = $self->param('input_seqio');
my $max_chunk_length = $self->param('max_chunk_length');
my $output_prefix = $self->param('output_prefix');
my $output_suffix = $self->param('output_suffix');
my $chunk_number = 1; # counts the chunks
my $chunk_length = 0; # total length of the current chunk
my $chunk_size = 0; # number of sequences in the current chunk
my $chunk_name = $output_prefix.$chunk_number.$output_suffix;
my $chunk_seqio = Bio::SeqIO->new(-file => '>'.$chunk_name, -format => 'fasta');
while (my $seq_object = $input_seqio->next_seq) {
if((my $seq_length = $seq_object->length()) + $chunk_length <= $max_chunk_length) {
# add to the current chunk:
$chunk_seqio->write_seq( $seq_object );
$chunk_length += $seq_length;
$chunk_size += 1;
} else {
# dataflow the current chunk:
$self->dataflow_output_id( {
'chunk_name' => $chunk_name,
'chunk_number' => $chunk_number,
'chunk_length' => $chunk_length,
'chunk_size' => $chunk_size
}, 2);
# start writing to the next one:
$chunk_length = 0;
$chunk_size = 0;
$chunk_number++;
$chunk_name = $output_prefix.$chunk_number.$output_suffix;
$chunk_seqio = Bio::SeqIO->new(-file => '>'.$chunk_name, -format => 'fasta');
}
}
if($chunk_size) { # flush the last chunk:
$self->dataflow_output_id( {
'chunk_name' => $chunk_name,
'chunk_number' => $chunk_number,
'chunk_length' => $chunk_length,
'chunk_size' => $chunk_size
}, 2);
}
}