Skip to content
Snippets Groups Projects
Commit 2ffc9730 authored by Andy Yates's avatar Andy Yates
Browse files

2 spacing not 4 spacing

parent 7de46409
No related branches found
No related tags found
No related merge requests found
......@@ -6,7 +6,7 @@
This software is distributed under a modified Apache license.
For license details, please see
http://www.ensembl.org/info/about/code_licence.html
http://www.ensembl.org/info/about/code_licence.html
=head1 CONTACT
......@@ -28,25 +28,25 @@ Bio::EnsEMBL::Utils::IO::FASTASerializer
$serializer->chunk_factor(1000);
$serializer->line_width(60);
$serializer->print_Seq($slice);
$serializer = Bio::EnsEMBL::Utils::IO::FASTASerializer->new($filehandle,
sub {
my $slice = shift;
return "Custom header";
}
sub {
my $slice = shift;
return "Custom header";
}
);
=head1 DESCRIPTION
Replacement for SeqDumper, making better use of shared code. Outputs FASTA
format with optional custom header and formatting parameters. Set line_width
and chunk_factor to dictate buffer size depending on application. A 60kb
buffer is used by default with a line width of 60 characters.
Custom headers are set by supplying an anonymous subroutine to new(). Custom
header code must accept a Slice or Bio::PrimarySeqI compliant object as
header code must accept a Slice or Bio::PrimarySeqI compliant object as
argument and return a string.
The custom header method can be overridden later through set_custom_header()
but this is not normally necessary.
......@@ -63,162 +63,162 @@ use base qw(Bio::EnsEMBL::Utils::IO::Serializer);
=head2 new
Arg [1] : Filehandle (optional)
Arg [2] : CODEREF subroutine for writing custom headers
Arg [3] : [optional] Chunking size (integer)
Arg [4] : [optional] Line width (integer)
Example : $dumper = Bio::EnsEMBL::Utils::IO::FASTASerializer->new($filehandle,$header_function,1000,60);
Arg [1] : Filehandle (optional)
Arg [2] : CODEREF subroutine for writing custom headers
Arg [3] : [optional] Chunking size (integer)
Arg [4] : [optional] Line width (integer)
Example : $dumper = Bio::EnsEMBL::Utils::IO::FASTASerializer->new($filehandle,$header_function,1000,60);
Description: Constructor
Allows the specification of a custom function for rendering
header lines.
Allows the specification of a custom function for rendering
header lines.
Returntype : Bio::EnsEMBL::Utils::IO::FASTASerializer;
Exceptions : none
Caller : general
Caller : general
=cut
sub new {
my $caller = shift;
my $class = ref($caller) || $caller;
my $filehandle = shift;
my $header_function = shift;
my $chunk_factor = shift;
my $line_width = shift;
my $self = $class->SUPER::new($filehandle);
$self->{'header_function'} = $header_function;
$self->{'line_width'} = ($line_width)? $line_width : 60;
$self->{'chunk_factor'} = ($chunk_factor)? $chunk_factor : 1000;
# gives a 60kb buffer by default, increase for higher database and disk efficiency.
# TODO: Check this error trap works as intended
if ( defined($self->{'header_function'}) ) {
if (ref($self->{'header_function'}) ne "CODE") {
throw("Custom header function must be an anonymous subroutine when instantiating FASTASerializer");}
}
else {
$self->{'header_function'} = sub {
my $slice = shift;
if(check_ref($slice, 'Bio::EnsEMBL::Slice')) {
my $id = $slice->seq_region_name;
my $seqtype = 'dna';
my $idtype = $slice->coord_system->name;
my $location = $slice->name;
return "$id $seqtype:$idtype $location";
}
else {
# must be a Bio::Seq , or we're doomed
return $slice->display_id;
}
};
}
return $self;
my $caller = shift;
my $class = ref($caller) || $caller;
my $filehandle = shift;
my $header_function = shift;
my $chunk_factor = shift;
my $line_width = shift;
my $self = $class->SUPER::new($filehandle);
$self->{'header_function'} = $header_function;
$self->{'line_width'} = ($line_width)? $line_width : 60;
$self->{'chunk_factor'} = ($chunk_factor)? $chunk_factor : 1000;
# gives a 60kb buffer by default, increase for higher database and disk efficiency.
# TODO: Check this error trap works as intended
if ( defined($self->{'header_function'}) ) {
if (ref($self->{'header_function'}) ne "CODE") {
throw("Custom header function must be an anonymous subroutine when instantiating FASTASerializer");}
}
else {
$self->{'header_function'} = sub {
my $slice = shift;
if(check_ref($slice, 'Bio::EnsEMBL::Slice')) {
my $id = $slice->seq_region_name;
my $seqtype = 'dna';
my $idtype = $slice->coord_system->name;
my $location = $slice->name;
return "$id $seqtype:$idtype $location";
}
else {
# must be a Bio::Seq , or we're doomed
return $slice->display_id;
}
};
}
return $self;
}
=head2 print_metadata
Arg [1] : Bio::EnsEMBL::Slice
Description: Printing header lines into FASTA files. Usually handled
internally to the serializer.
Returntype : None
Caller : print_Seq
Arg [1] : Bio::EnsEMBL::Slice
Description: Printing header lines into FASTA files. Usually handled
internally to the serializer.
Returntype : None
Caller : print_Seq
=cut
sub print_metadata {
my $self = shift;
my $slice = shift;
my $fh = $self->{'filehandle'};
my $function = $self->header_function();
my $metadata = $function->($slice);
print $fh '>'.$metadata."\n";
my $self = shift;
my $slice = shift;
my $fh = $self->{'filehandle'};
my $function = $self->header_function();
my $metadata = $function->($slice);
print $fh '>'.$metadata."\n";
}
=head2 print_Seq
Arg [1] : Bio::EnsEMBL::Slice or other Bio::PrimarySeqI compliant object
Description: Serializes the slice into FASTA format. Buffering is used
While other Bioperl PrimarySeqI implementations can be used,
a custom header function will be required to accommodate it.
Returntype : None
Arg [1] : Bio::EnsEMBL::Slice or other Bio::PrimarySeqI compliant object
Description: Serializes the slice into FASTA format. Buffering is used
While other Bioperl PrimarySeqI implementations can be used,
a custom header function will be required to accommodate it.
Returntype : None
=cut
sub print_Seq {
my $self = shift;
my $slice = shift;
my $fh = $self->{'filehandle'};
$self->print_metadata($slice);
my $width = $self->{line_width};
# set buffer size
my $chunk_size = $self->{'chunk_factor'} * $width;
my $start = 1;
my $end = $slice->length();
my $self = shift;
my $slice = shift;
my $fh = $self->{'filehandle'};
$self->print_metadata($slice);
my $width = $self->{line_width};
# set buffer size
my $chunk_size = $self->{'chunk_factor'} * $width;
my $start = 1;
my $end = $slice->length();
#chunk the sequence to conserve memory, and print
my $here = $start;
while($here <= $end) {
my $there = $here + $chunk_size - 1;
$there = $end if($there > $end);
$there = $end if($there > $end);
my $seq = $slice->subseq($here, $there);
$seq =~ s/(.{1,$width})/$1\n/g;
print $fh $seq or die "Error writing to file handle";
$here = $there + 1;
}
if ($slice->length > 0) {$self->{'achieved_something'} = 1;}
}
=head2 line_width
Arg [1] : Integer e.g. 60 or 80
Description: Set and get FASTA format line width. Default is 60
Returntype : Integer
Arg [1] : Integer e.g. 60 or 80
Description: Set and get FASTA format line width. Default is 60
Returntype : Integer
=cut
sub line_width {
my $self = shift;
my $line_width = shift;
if ($line_width) { $self->{'line_width'} = $line_width };
return $self->{'line_width'}
my $self = shift;
my $line_width = shift;
if ($line_width) { $self->{'line_width'} = $line_width };
return $self->{'line_width'}
}
=head2 chunk_factor
Arg [1] : Integer e.g. 1000
Description: Set and get the multiplier used to dictate buffer size
Chunk factor x line width = buffer size in bases.
Returntype : Integer
Arg [1] : Integer e.g. 1000
Description: Set and get the multiplier used to dictate buffer size
Chunk factor x line width = buffer size in bases.
Returntype : Integer
=cut
sub chunk_factor {
my $self = shift;
my $chunk_factor = shift;
if ($chunk_factor) { $self->{'chunk_factor'} = $chunk_factor};
return $self->{'chunk_factor'}
my $self = shift;
my $chunk_factor = shift;
if ($chunk_factor) { $self->{'chunk_factor'} = $chunk_factor};
return $self->{'chunk_factor'}
}
=head2 set_custom_header
Arg [1] : CODE reference
Description: Set the custom header function. Normally this is done at
construction time, but can be overridden here.
Example : $serializer->set_custom_header( sub { return 'New header'});
Returntype :
Arg [1] : CODE reference
Description: Set the custom header function. Normally this is done at
construction time, but can be overridden here.
Example : $serializer->set_custom_header( sub { return 'New header'});
Returntype :
=cut
sub set_custom_header {
......@@ -229,11 +229,11 @@ sub set_custom_header {
=head2 header_function
Arg [1] : CODE reference (optional)
Description: Getter/setter for the custom header code
Example : $serializer->header_function( sub { return 'New header'});
Returntype : CODE
Arg [1] : CODE reference (optional)
Description: Getter/setter for the custom header code
Example : $serializer->header_function( sub { return 'New header'});
Returntype : CODE
=cut
sub header_function {
......@@ -245,4 +245,4 @@ sub header_function {
return $self->{header_function};
}
1;
\ No newline at end of file
1;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment