Commit 7ad2108f authored by Andy Yates's avatar Andy Yates
Browse files

[ENSCORESW-224]. Fix for multi-line SQ lines caused by large sequences being...

[ENSCORESW-224]. Fix for multi-line SQ lines caused by large sequences being pushed through SeqDumper. Was never an issue before as our slices were never big enough. Applied the same fix to Genbank but it should be less of an issue due to it's formatting meaning we need larger slices to cause the issue.
parent 7082a8df
......@@ -494,7 +494,7 @@ sub dump_embl {
my $value = "Sequence $length BP; $a_count A; $c_count C; " .
"$g_count G; $t_count T; $other_count other;";
$self->write($FH, $EMBL_HEADER, 'SQ', $value);
$self->print($FH, 'SQ '.$value."\n");
$self->write_embl_seq($FH, \$SEQ);
......@@ -649,7 +649,7 @@ sub dump_genbank {
$tag = 'BASE COUNT';
$value = "$a_count a $c_count c $g_count g $t_count t";
$value .= " $other_count n" if($other_count);
$self->write($FH, $GENBANK_HEADER, $tag, $value);
$self->print($FH, qq{$tag $value\n});
$self->print( $FH, "ORIGIN\n" );
$self->write_genbank_seq($FH, \$SEQ);
......
use strict;
use warnings;
use Test::More;
use IO::String;
use Bio::EnsEMBL::Slice;
use Bio::EnsEMBL::Utils::SeqDumper;
use Bio::EnsEMBL::Test::TestUtils;
use Bio::EnsEMBL::Test::MultiTestDB;
BEGIN { $| = 1;
use Test;
plan tests => 7;
}
our $verbose = 0;
my $multi = Bio::EnsEMBL::Test::MultiTestDB->new();
......@@ -64,3 +62,44 @@ ok(1);
$seq_dumper->dump($slice, 'FASTA', $file);
ok(1);
my $index_fh = sub {
my ($fh, $substr) = @_;
$fh->setpos(0);
my @lines;
while(my $line = <$fh>) {
chomp $line;
push(@lines, $line) if index($line, $substr) == 0;
}
return \@lines;
};
my $index_count_fh = sub {
my ($fh, $substr) = @_;
return scalar(@{$index_fh->($fh, $substr)});
};
{
my $frag_size = 1e7;
my $seq = 'A'x$frag_size.'C'x$frag_size.'T'x$frag_size.'G'x$frag_size;
my $sd = Bio::EnsEMBL::Utils::SeqDumper->new();
$sd->{feature_types}->{$_} = 0 for keys %{$sd->{feature_types}};
{
my $fh = IO::String->new();
$sd->dump_embl($slice, $fh, $seq);
my $lines = $index_fh->($fh, 'SQ ');
is(scalar(@{$lines}), 1, 'Expect only 1 EMBL SQ line describing a sequence');
is($lines->[0], 'SQ Sequence 40000000 BP; 10000000 A; 10000000 C; 10000000 G; 10000000 T; 0 other;', 'Formatting of SQ as expected');
}
{
my $fh = IO::String->new();
$sd->dump_genbank($slice, $fh, $seq);
my $lines = $index_fh->($fh, 'BASE COUNT');
is(@{$lines}, 1, 'Expect only 1 Genbank BASE COUNT line describing a sequence');
is($lines->[0], 'BASE COUNT 10000000 a 10000000 c 10000000 g 10000000 t', 'Formatting of BASE COUNT as expected');
}
}
done_testing();
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment