fastafactory.t 3.39 KB
Newer Older
1 2
#!/usr/bin/env perl

3
# Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 5 6 7 8 9 10 11 12 13 14 15 16
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#      http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Roy Storey's avatar
Roy Storey committed
17 18 19 20

use strict;
use warnings;

21 22
use Cwd;
use File::Basename;
Roy Storey's avatar
Roy Storey committed
23 24 25
use Test::More;
use Data::Dumper;
use File::Temp qw{tempdir};
26 27

use Bio::EnsEMBL::Hive::Utils::Test qw(standaloneJob);
Roy Storey's avatar
Roy Storey committed
28

29 30
# Where the Fasta file should be
my $inputfile = File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) ).'/input_fasta.fa';
31

Roy Storey's avatar
Roy Storey committed
32 33 34
my $dir = tempdir CLEANUP => 1;
chdir $dir;

35 36 37
standaloneJob(
    'Bio::EnsEMBL::Hive::RunnableDB::FastaFactory',
    {
38 39 40 41
        'inputfile'         => $inputfile,
        'max_chunk_length'  => 20000, ## big enough for all sequences
        'output_prefix'     => './test1_',
        'output_suffix'     => '.fa',
42 43 44 45 46 47 48 49 50 51 52 53 54 55
    },
    [
        [
            'DATAFLOW',
            {
                'chunk_number' => 1,
                'chunk_length' => 3360,
                'chunk_size' => 3,
                'chunk_name' => './test1_1.fa'
            },
            2
        ]
    ],
);
Roy Storey's avatar
Roy Storey committed
56 57 58 59 60


##
## do some checks
##
Roy Storey's avatar
Roy Storey committed
61
my $expected_filename = 'test1_1.fa';
Roy Storey's avatar
Roy Storey committed
62 63
ok(-e $expected_filename, 'output file exists');

64
is((stat($expected_filename))[7], (stat($inputfile))[7], 'file size of input == output');
Roy Storey's avatar
Roy Storey committed
65

Roy Storey's avatar
Roy Storey committed
66 67
my @all_files = glob('test1_*.fa');
is(@all_files, 1, 'exactly one output file - test 1');
Roy Storey's avatar
Roy Storey committed
68

69

Roy Storey's avatar
Roy Storey committed
70 71 72
## 
## next job
##
73 74 75 76 77 78
standaloneJob('Bio::EnsEMBL::Hive::RunnableDB::FastaFactory', {
        'inputfile'         => $inputfile,
        'max_chunk_length'  => 200, ## smaller than all sequences
        'output_prefix'     => './test2_',
        'output_suffix'     => '.fa',
});
Roy Storey's avatar
Roy Storey committed
79 80


Roy Storey's avatar
Roy Storey committed
81 82
$expected_filename = 'test2_1.fa';
ok(-e $expected_filename, 'output file exists');
Roy Storey's avatar
Roy Storey committed
83

Roy Storey's avatar
Roy Storey committed
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
@all_files = glob('test2_*.fa');
is(@all_files, 3, 'correct number of output files - test 2');
# diag "@all_files";

my $expected_properties = {
    'test2_1.fa' => [ 662 ],
    'test2_2.fa' => [ 1313 ],
    'test2_3.fa' => [ 1475 ],
    'test2_4.fa' => [ 0 ],

    'test3_1.fa' => [ 1975 ],
    'test3_2.fa' => [ 1475 ],
    'test3_3.fa' => [ 0 ],
    'test3_4.fa' => [ 0 ],
};

foreach my $file(@all_files) {
    my $exp_size = $expected_properties->{$file}[0];
    is((stat($file))[7], $exp_size, "file '$file' has expected file size ($exp_size)");
}
Roy Storey's avatar
Roy Storey committed
104

Roy Storey's avatar
Roy Storey committed
105 106 107
## 
## next job
##
108 109 110 111 112 113
standaloneJob('Bio::EnsEMBL::Hive::RunnableDB::FastaFactory', {
        'inputfile'         => $inputfile,
        'max_chunk_length'  => 1000, ## smaller than two combined sequences
        'output_prefix'     => './test3_',
        'output_suffix'     => '.fa',
});
Roy Storey's avatar
Roy Storey committed
114 115 116 117 118 119 120 121 122 123 124 125

$expected_filename = 'test3_1.fa';
ok(-e $expected_filename, 'output file exists');

@all_files = glob('test3_*.fa');
is(@all_files, 2, 'correct number of output files - test 3');
# diag "@all_files";

foreach my $file(@all_files) {
    my $exp_size = $expected_properties->{$file}[0];
    is((stat($file))[7], $exp_size, "file '$file' has expected file size ($exp_size)");
}
Roy Storey's avatar
Roy Storey committed
126 127

done_testing();