Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
d28c4ba9
Commit
d28c4ba9
authored
13 years ago
by
Andy Yates
Browse files
Options
Downloads
Patches
Plain Diff
Optional gzip support
parent
314c24a6
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
modules/Bio/EnsEMBL/Utils/IO.pm
+120
-4
120 additions, 4 deletions
modules/Bio/EnsEMBL/Utils/IO.pm
modules/t/utilsIo.t
+59
-30
59 additions, 30 deletions
modules/t/utilsIo.t
with
179 additions
and
34 deletions
modules/Bio/EnsEMBL/Utils/IO.pm
+
120
−
4
View file @
d28c4ba9
...
...
@@ -34,6 +34,7 @@ Bio::EnsEMBL::Utils::IO
#or
# use Bio::EnsEMBL::Utils::IO qw/:slurp/; #brings in any method starting with slurp
# use Bio::EnsEMBL::Utils::IO qw/:array/; #brings in any method which ends with _array
# use Bio::EnsEMBL::Utils::IO qw/:gz/; #brings all methods which start with gz_
# use Bio::EnsEMBL::Utils::IO qw/:all/; #brings all methods in
#As a scalar
...
...
@@ -62,6 +63,13 @@ Bio::EnsEMBL::Utils::IO
print $fh $$file_contents_ref;
return;
});
#Gzipping the data to another file
gz_work_with_file('/my/file.gz', 'w', sub {
my ($fh) = @_;
print $fh $$file_contents_ref;
return;
});
=head1 DESCRIPTION
...
...
@@ -86,15 +94,22 @@ use warnings;
use
base
qw(Exporter)
;
our
@EXPORT_OK
=
qw/slurp slurp_to_array fh_to_array process_to_array work_with_file/
;
our
$GZIP_OK
=
0
;
our
@EXPORT_OK
=
qw/slurp slurp_to_array fh_to_array process_to_array work_with_file gz_slurp gz_slurp_to_array gz_work_with_file/
;
our
%EXPORT_TAGS
=
(
all
=>
[
@EXPORT_OK
],
slurp
=>
[
qw/slurp slurp_to_array/
],
array
=>
[
qw/fh_to_array process_to_array slurp_to_array/
]
slurp
=>
[
qw/slurp slurp_to_array gz_slurp gz_slurp_to_array/
],
array
=>
[
qw/fh_to_array process_to_array slurp_to_array gz_slurp_to_array/
],
gz
=>
[
qw/gz_slurp gz_slurp_to_array gz_work_with_file/
]
);
use
Bio::EnsEMBL::Utils::
Exception
qw(throw)
;
use
Bio::EnsEMBL::Utils::
Scalar
qw(:assert)
;
use
IO::
File
;
eval
{
require
IO::Compress::
Gzip
;
require
IO::Uncompress::
Gunzip
;
$GZIP_OK
=
1
;
};
=head2 slurp()
...
...
@@ -127,6 +142,37 @@ sub slurp {
return
(
$want_ref
)
?
\
$contents
:
$contents
;
}
=head2 gz_slurp()
Arg [1] : string $file
Arg [2] : boolean; $want_ref
Arg [3] : boolean; $binary
Indicates if we want to return a scalar reference
Description : Forces the contents of a file into a scalar. This is the
fastest way to get a file into memory in Perl. You can also
get a scalar reference back to avoid copying the file contents
in Scalar references. If the input file is binary then specify
with the binary flag
Returntype : Scalar or reference of the file contents depending on arg 2
Example : my $contents = slurp('/tmp/file.txt.gz');
Exceptions : If the file did not exist or was not readable
Status : Stable
=cut
sub
gz_slurp
{
my
(
$file
,
$want_ref
,
$binary
)
=
@_
;
my
$contents
;
gz_work_with_file
(
$file
,
'
r
',
sub
{
my
(
$fh
)
=
@_
;
local
$/
=
undef
;
binmode
(
$fh
)
if
$binary
;
$contents
=
<
$fh
>
;
return
;
});
return
(
$want_ref
)
?
\
$contents
:
$contents
;
}
=head2 slurp_to_array()
Arg [1] : string $file
...
...
@@ -150,6 +196,29 @@ sub slurp_to_array {
return
$contents
;
}
=head2 gz_slurp_to_array()
Arg [1] : string $file
Arg [2] : boolean $chomp
Description : Sends the contents of the given gzipped file into an ArrayRef
Returntype : ArrayRef
Example : my $contents_array = slurp_to_array('/tmp/file.txt.gz');
Exceptions : If the file did not exist or was not readable
Status : Stable
=cut
sub
gz_slurp_to_array
{
my
(
$file
,
$chomp
)
=
@_
;
my
$contents
;
gz_work_with_file
(
$file
,
'
r
',
sub
{
my
(
$fh
)
=
@_
;
$contents
=
fh_to_array
(
$fh
,
$chomp
);
return
;
});
return
$contents
;
}
=head2 fh_to_array()
Arg [1] : Glob/IO::Handle $fh
...
...
@@ -225,7 +294,8 @@ sub process_to_array {
sub
work_with_file
{
my
(
$file
,
$mode
,
$callback
)
=
@_
;
throw
"
We need a mode to open the requested file with
"
if
!
$file
;
throw
"
We need a file name to open
"
if
!
$file
;
throw
"
We need a mode to open the requested file with
"
if
!
$mode
;
assert_ref
(
$callback
,
'
CODE
',
'
callback
');
my
$fh
=
IO::
File
->
new
(
$file
,
$mode
)
or
throw
"
Cannot open '
${file}
' in mode '
${mode}
': $!
";
...
...
@@ -234,4 +304,50 @@ sub work_with_file {
return
;
}
=head2 gz_work_with_file()
Arg [1] : string $file
Arg [2] : string; $mode
Supports modes like C<r>, C<w>, C<\>> and C<\<>
Arg [3] : CodeRef the callback which is given the open file handle as
its only argument
Description : Performs the nitty gritty of checking if a file handle is open
and closing the resulting filehandle down.
Returntype : None
Example : work_with_file('/tmp/out.txt.gz', 'w', sub {
my ($fh) = @_;
print $fh 'hello';
return;
});
Exceptions : If we could not work with the file due to permissions
Status : Stable
=cut
sub
gz_work_with_file
{
my
(
$file
,
$mode
,
$callback
)
=
@_
;
throw
"
IO::Compress was not available
"
if
!
$GZIP_OK
;
throw
"
We need a file name to open
"
if
!
$file
;
throw
"
We need a mode to open the requested file with
"
if
!
$mode
;
assert_ref
(
$callback
,
'
CODE
',
'
callback
');
my
$fh
;
{
no
warnings
qw/once/
;
if
(
$mode
=~
'
>$
'
||
$mode
eq
'
w
')
{
my
$append
=
(
$mode
=~
/>>$/
)
?
1
:
0
;
$fh
=
IO::Compress::
Gzip
->
new
(
$file
,
Append
=>
$append
)
or
throw
"
Cannot open '
$file
' for writing:
$IO
::Compress::Gzip::GzipError
";
}
elsif
(
$mode
eq
'
<
'
||
$mode
eq
'
r
')
{
$fh
=
IO::Uncompress::
Gunzip
->
new
(
$file
)
or
throw
"
Cannot open '
$file
' for writing:
$IO
::Uncompress::Gunzip::GunzipError
";
}
else
{
throw
"
Could not decipher a mode from '
$mode
'
";
}
};
$callback
->
(
$fh
);
close
(
$fh
)
or
throw
"
Cannot close FH from
${file}
: $!
";
return
;
return
;
}
1
;
This diff is collapsed.
Click to expand it.
modules/t/utilsIo.t
+
59
−
30
View file @
d28c4ba9
...
...
@@ -3,10 +3,14 @@ use warnings;
use
Test::
More
;
use
Test::
Exception
;
use
File::
Temp
qw/tempfile/
;
use
Bio::EnsEMBL::Utils::
IO
qw/:all/
;
my
$file
=
'
/tmp/
'
.
$ENV
{
USER
}
.
'
utilsIo.txt
';
my
(
$tmp_fh
,
$file
)
=
tempfile
();
close
(
$tmp_fh
);
unlink
$file
;
my
$contents
=
<<'EOF';
>X
AAAAGGGTTCCC
...
...
@@ -14,36 +18,61 @@ TTGGCCAAAAAA
ATTC
EOF
throws_ok
{
slurp
(
$file
)
}
qr/No such file/
,
'
File does not currently exist so die
';
work_with_file
(
$file
,
'
w
',
sub
{
my
(
$fh
)
=
@_
;
print
$fh
$contents
;
return
;
});
my
$written_contents
=
slurp
(
$file
);
is
(
$contents
,
$written_contents
,
'
Contents should be the same
');
my
$written_contents_ref
=
slurp
(
$file
,
1
);
is
('
SCALAR
',
ref
(
$written_contents_ref
),
'
Asked for a ref so expect one back
');
is
(
$contents
,
$$written_contents_ref
,
'
Contents should be the same
');
work_with_file
(
$file
,
'
r
',
sub
{
my
(
$fh
)
=
@_
;
my
$line
=
<
$fh
>
;
chomp
(
$line
);
is
(
$line
,
'
>X
',
'
First line expected to be FASTA header
');
});
my
$expected_array
=
[
qw/>X AAAAGGGTTCCC TTGGCCAAAAAA ATTC/
];
my
$chomp
=
1
;
is_deeply
(
slurp_to_array
(
$file
,
$chomp
),
$expected_array
,
'
Checking slurp to array with chomp
');
$chomp
=
0
;
is_deeply
(
slurp_to_array
(
$file
,
$chomp
),
[
map
{
"
${_}
\n
"
}
@
{
$expected_array
}
],
'
Checking slurp to array with chomp
');
unlink
$file
;
{
throws_ok
{
slurp
(
$file
)
}
qr/No such file/
,
'
File does not currently exist so die
';
work_with_file
(
$file
,
'
w
',
sub
{
my
(
$fh
)
=
@_
;
print
$fh
$contents
;
return
;
});
my
$written_contents
=
slurp
(
$file
);
is
(
$contents
,
$written_contents
,
'
Contents should be the same
');
my
$written_contents_ref
=
slurp
(
$file
,
1
);
is
('
SCALAR
',
ref
(
$written_contents_ref
),
'
Asked for a ref so expect one back
');
is
(
$contents
,
$$written_contents_ref
,
'
Contents should be the same
');
work_with_file
(
$file
,
'
r
',
sub
{
my
(
$fh
)
=
@_
;
my
$line
=
<
$fh
>
;
chomp
(
$line
);
is
(
$line
,
'
>X
',
'
First line expected to be FASTA header
');
});
my
$chomp
=
1
;
is_deeply
(
slurp_to_array
(
$file
,
$chomp
),
$expected_array
,
'
Checking slurp to array with chomp
');
$chomp
=
0
;
is_deeply
(
slurp_to_array
(
$file
,
$chomp
),
[
map
{
$_
.
"
\n
"
}
@
{
$expected_array
}],
'
Checking slurp to array with chomp
');
unlink
$file
;
dies_ok
{
slurp
(
$file
)
}
'
File no longer exists so die
';
}
dies_ok
{
slurp
(
$file
)
}
'
File no longer exists so die
';
{
gz_work_with_file
(
$file
,
'
w
',
sub
{
my
(
$fh
)
=
@_
;
print
$fh
$contents
;
return
;
});
my
$written_contents
=
gz_slurp
(
$file
);
is
(
$contents
,
$written_contents
,
'
Gzipped Contents should be the same
');
my
$non_gz_written_contents
=
slurp
(
$file
);
isnt
(
$contents
,
$non_gz_written_contents
,
'
Reading normally should not return the same contents
');
my
$chomp
=
1
;
is_deeply
(
gz_slurp_to_array
(
$file
,
$chomp
),
$expected_array
,
'
Checking slurp to array with chomp
');
$chomp
=
0
;
is_deeply
(
gz_slurp_to_array
(
$file
,
$chomp
),
[
map
{
$_
.
"
\n
"
}
@
{
$expected_array
}],
'
Checking slurp to array with chomp
');
unlink
$file
;
dies_ok
{
slurp
(
$file
)
}
'
File no longer exists so die
';
}
done_testing
();
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment