Skip to content
Snippets Groups Projects
Commit d28c4ba9 authored by Andy Yates's avatar Andy Yates
Browse files

Optional gzip support

parent 314c24a6
No related branches found
No related tags found
No related merge requests found
......@@ -34,6 +34,7 @@ Bio::EnsEMBL::Utils::IO
#or
# use Bio::EnsEMBL::Utils::IO qw/:slurp/; #brings in any method starting with slurp
# use Bio::EnsEMBL::Utils::IO qw/:array/; #brings in any method which ends with _array
# use Bio::EnsEMBL::Utils::IO qw/:gz/; #brings all methods which start with gz_
# use Bio::EnsEMBL::Utils::IO qw/:all/; #brings all methods in
#As a scalar
......@@ -62,6 +63,13 @@ Bio::EnsEMBL::Utils::IO
print $fh $$file_contents_ref;
return;
});
#Gzipping the data to another file
gz_work_with_file('/my/file.gz', 'w', sub {
my ($fh) = @_;
print $fh $$file_contents_ref;
return;
});
=head1 DESCRIPTION
......@@ -86,15 +94,22 @@ use warnings;
use base qw(Exporter);
our @EXPORT_OK = qw/slurp slurp_to_array fh_to_array process_to_array work_with_file/;
our $GZIP_OK = 0;
our @EXPORT_OK = qw/slurp slurp_to_array fh_to_array process_to_array work_with_file gz_slurp gz_slurp_to_array gz_work_with_file/;
our %EXPORT_TAGS = (
all => [@EXPORT_OK],
slurp => [qw/slurp slurp_to_array/],
array => [qw/fh_to_array process_to_array slurp_to_array/]
slurp => [qw/slurp slurp_to_array gz_slurp gz_slurp_to_array/],
array => [qw/fh_to_array process_to_array slurp_to_array gz_slurp_to_array/],
gz => [qw/gz_slurp gz_slurp_to_array gz_work_with_file/]
);
use Bio::EnsEMBL::Utils::Exception qw(throw);
use Bio::EnsEMBL::Utils::Scalar qw(:assert);
use IO::File;
eval {
require IO::Compress::Gzip;
require IO::Uncompress::Gunzip;
$GZIP_OK = 1;
};
=head2 slurp()
......@@ -127,6 +142,37 @@ sub slurp {
return ($want_ref) ? \$contents : $contents;
}
=head2 gz_slurp()
Arg [1] : string $file
Arg [2] : boolean; $want_ref
Arg [3] : boolean; $binary
Indicates if we want to return a scalar reference
Description : Forces the contents of a file into a scalar. This is the
fastest way to get a file into memory in Perl. You can also
get a scalar reference back to avoid copying the file contents
in Scalar references. If the input file is binary then specify
with the binary flag
Returntype : Scalar or reference of the file contents depending on arg 2
Example : my $contents = slurp('/tmp/file.txt.gz');
Exceptions : If the file did not exist or was not readable
Status : Stable
=cut
sub gz_slurp {
my ($file, $want_ref, $binary) = @_;
my $contents;
gz_work_with_file($file, 'r', sub {
my ($fh) = @_;
local $/ = undef;
binmode($fh) if $binary;
$contents = <$fh>;
return;
});
return ($want_ref) ? \$contents : $contents;
}
=head2 slurp_to_array()
Arg [1] : string $file
......@@ -150,6 +196,29 @@ sub slurp_to_array {
return $contents;
}
=head2 gz_slurp_to_array()
Arg [1] : string $file
Arg [2] : boolean $chomp
Description : Sends the contents of the given gzipped file into an ArrayRef
Returntype : ArrayRef
Example : my $contents_array = slurp_to_array('/tmp/file.txt.gz');
Exceptions : If the file did not exist or was not readable
Status : Stable
=cut
sub gz_slurp_to_array {
my ($file, $chomp) = @_;
my $contents;
gz_work_with_file($file, 'r', sub {
my ($fh) = @_;
$contents = fh_to_array($fh, $chomp);
return;
});
return $contents;
}
=head2 fh_to_array()
Arg [1] : Glob/IO::Handle $fh
......@@ -225,7 +294,8 @@ sub process_to_array {
sub work_with_file {
my ($file, $mode, $callback) = @_;
throw "We need a mode to open the requested file with" if ! $file;
throw "We need a file name to open" if ! $file;
throw "We need a mode to open the requested file with" if ! $mode;
assert_ref($callback, 'CODE', 'callback');
my $fh = IO::File->new($file, $mode) or
throw "Cannot open '${file}' in mode '${mode}': $!";
......@@ -234,4 +304,50 @@ sub work_with_file {
return;
}
=head2 gz_work_with_file()
Arg [1] : string $file
Arg [2] : string; $mode
Supports modes like C<r>, C<w>, C<\>> and C<\<>
Arg [3] : CodeRef the callback which is given the open file handle as
its only argument
Description : Performs the nitty gritty of checking if a file handle is open
and closing the resulting filehandle down.
Returntype : None
Example : work_with_file('/tmp/out.txt.gz', 'w', sub {
my ($fh) = @_;
print $fh 'hello';
return;
});
Exceptions : If we could not work with the file due to permissions
Status : Stable
=cut
sub gz_work_with_file {
my ($file, $mode, $callback) = @_;
throw "IO::Compress was not available"if ! $GZIP_OK;
throw "We need a file name to open" if ! $file;
throw "We need a mode to open the requested file with" if ! $mode;
assert_ref($callback, 'CODE', 'callback');
my $fh;
{
no warnings qw/once/;
if($mode =~ '>$' || $mode eq 'w') {
my $append = ($mode =~ />>$/) ? 1 : 0;
$fh = IO::Compress::Gzip->new($file, Append => $append) or throw "Cannot open '$file' for writing: $IO::Compress::Gzip::GzipError";
}
elsif($mode eq '<' || $mode eq 'r') {
$fh = IO::Uncompress::Gunzip->new($file) or throw "Cannot open '$file' for writing: $IO::Uncompress::Gunzip::GunzipError";
}
else {
throw "Could not decipher a mode from '$mode'";
}
};
$callback->($fh);
close($fh) or throw "Cannot close FH from ${file}: $!";
return;
return;
}
1;
......@@ -3,10 +3,14 @@ use warnings;
use Test::More;
use Test::Exception;
use File::Temp qw/tempfile/;
use Bio::EnsEMBL::Utils::IO qw/:all/;
my $file = '/tmp/'.$ENV{USER}.'utilsIo.txt';
my ($tmp_fh, $file) = tempfile();
close($tmp_fh);
unlink $file;
my $contents = <<'EOF';
>X
AAAAGGGTTCCC
......@@ -14,36 +18,61 @@ TTGGCCAAAAAA
ATTC
EOF
throws_ok { slurp($file) } qr/No such file/, 'File does not currently exist so die';
work_with_file($file, 'w', sub {
my ($fh) = @_;
print $fh $contents;
return;
});
my $written_contents = slurp($file);
is($contents, $written_contents, 'Contents should be the same');
my $written_contents_ref = slurp($file, 1);
is('SCALAR', ref($written_contents_ref), 'Asked for a ref so expect one back');
is($contents, $$written_contents_ref, 'Contents should be the same');
work_with_file($file, 'r', sub {
my ($fh) = @_;
my $line = <$fh>;
chomp($line);
is($line, '>X', 'First line expected to be FASTA header');
});
my $expected_array = [qw/>X AAAAGGGTTCCC TTGGCCAAAAAA ATTC/];
my $chomp = 1;
is_deeply(slurp_to_array($file, $chomp), $expected_array, 'Checking slurp to array with chomp');
$chomp = 0;
is_deeply(slurp_to_array($file, $chomp), [ map { "${_}\n" } @{$expected_array} ], 'Checking slurp to array with chomp');
unlink $file;
{
throws_ok { slurp($file) } qr/No such file/, 'File does not currently exist so die';
work_with_file($file, 'w', sub {
my ($fh) = @_;
print $fh $contents;
return;
});
my $written_contents = slurp($file);
is($contents, $written_contents, 'Contents should be the same');
my $written_contents_ref = slurp($file, 1);
is('SCALAR', ref($written_contents_ref), 'Asked for a ref so expect one back');
is($contents, $$written_contents_ref, 'Contents should be the same');
work_with_file($file, 'r', sub {
my ($fh) = @_;
my $line = <$fh>;
chomp($line);
is($line, '>X', 'First line expected to be FASTA header');
});
my $chomp = 1;
is_deeply(slurp_to_array($file, $chomp), $expected_array, 'Checking slurp to array with chomp');
$chomp = 0;
is_deeply(slurp_to_array($file, $chomp), [ map { $_."\n" } @{$expected_array}], 'Checking slurp to array with chomp');
unlink $file;
dies_ok { slurp($file) } 'File no longer exists so die';
}
dies_ok { slurp($file) } 'File no longer exists so die';
{
gz_work_with_file($file, 'w', sub {
my ($fh) = @_;
print $fh $contents;
return;
});
my $written_contents = gz_slurp($file);
is($contents, $written_contents, 'Gzipped Contents should be the same');
my $non_gz_written_contents = slurp($file);
isnt($contents, $non_gz_written_contents, 'Reading normally should not return the same contents');
my $chomp = 1;
is_deeply(gz_slurp_to_array($file, $chomp), $expected_array, 'Checking slurp to array with chomp');
$chomp = 0;
is_deeply(gz_slurp_to_array($file, $chomp), [ map { $_."\n" } @{$expected_array}], 'Checking slurp to array with chomp');
unlink $file;
dies_ok { slurp($file) } 'File no longer exists so die';
}
done_testing();
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment