SqlHealthcheck.pm 5.93 KB
Newer Older
1 2 3 4
=pod 

=head1 NAME

5
    Bio::EnsEMBL::Hive::RunnableDB::SqlHealthcheck
6 7 8 9 10

=head1 SYNOPSIS

    standaloneJob.pl Bio::EnsEMBL::Hive::RunnableDB::SqlHealthcheck \
                    -db_conn mysql://ensro@compara1/mm14_compara_homology_71 \
11 12
                    -description 'We expect at least 20,000 human genes' \
                    -query 'SELECT * FROM member WHERE genome_db_id = 90 AND source_name = "ENSEMBLGENE"' \
13 14 15 16
                    -expected_size '>= 20000'

=head1 DESCRIPTION

17 18 19 20 21 22 23 24 25 26
    This is a generic RunnableDB module for testing the size of the resultset of any SQL query.

    The query is passed by the parameter 'inputquery' (param substituted)
    The expected size is passed by the parameter 'expected_size' as a string "CONDITION VALUE" (CONDITION defaults to equality, VALUE defaults to 0).
    Currently, CONDITION is one of: = == < <= > >= <> !=

    TODO: implement a "expected_value" test

=head1 LICENSE

27
    Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Matthieu Muffato's avatar
Matthieu Muffato committed
28
    Copyright [2016-2018] EMBL-European Bioinformatics Institute
29

30 31
    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at
32

33 34 35 36 37 38 39 40
         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.

=head1 CONTACT

41
    Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates
42

43 44
=cut

45

46 47 48
package Bio::EnsEMBL::Hive::RunnableDB::SqlHealthcheck;

use strict;
49
use warnings;
50 51 52 53

use base ('Bio::EnsEMBL::Hive::Process');


54 55
sub param_defaults {
    return {
56 57 58
        'description'   => '/no description/',
        'expected_size' => undef,
        'inputquery'    => undef,
59 60 61 62 63
    }
}



64 65 66 67 68 69 70 71 72
=head2 fetch_input

    Description : Implements fetch_input() interface method of Bio::EnsEMBL::Hive::Process that is used to read in parameters and load data.
                  
=cut

sub fetch_input {
    my $self = shift @_;

73
    $self->param('inputquery') && warn "'inputquery' is deprecated in SqlHealthcheck. Use 'query' instead\n";
74
    
75 76 77 78 79 80 81 82
    my $test = {
        description => $self->param('description'),
        query => $self->param('inputquery') || $self->param_required('query'),
        expected_size => $self->param('expected_size'),
    };

    $self->param('tests', [$test]);
    $self->_validate_tests;
83 84 85
}


86
=head2 _validate_tests
87

88
    Description : Checks that the tests are properly defined, and parses the "expected_size"
89 90 91

=cut

92
sub _validate_tests {
93 94
    my $self = shift @_;

95 96 97
    foreach my $test (@{$self->param('tests')}) {
        die "The SQL query must be provided" unless $test->{query};
        die "The description must be provided" unless $test->{description};
98
        $test->{subst_query} = $self->param_substitute($test->{query});
99
        my $expected_size = $self->param_substitute($test->{expected_size} || '');
100 101 102 103 104 105 106
        unless ($expected_size =~ /^\s*(=|==|>|>=|<|<=|<>|!=|)\s*(\d*)\s*$/) {
            die "Cannot interpret the 'expected_size' parameter: '$expected_size'";
        }
        $test->{logical_test} = $1 || '=';
        $test->{reference_size} = $2 || '0';
    }
}
107

108
=head2 run
109

110 111
    Description : Implements run() interface method of Bio::EnsEMBL::Hive::Process
                  Iterate through the tests and run them all. Report the failed tests at the end
112

113
=cut
114

115 116
sub run {
    my $self = shift @_;
117

118 119 120
    my @failures = ();
    foreach my $test (@{$self->param('tests')}) {
        push @failures, $test unless $self->_run_test($test);
121
    }
122
    die "The following tests have failed:\n".join('', map {sprintf(" - %s\n   > %s\n", $_->{description}, $_->{subst_query})} @failures) if @failures;
123 124 125
}


126
=head2 _run_test
127

128 129
    Description : Runs a single test, defined in a hash with the following keys:
                   description, query, reference_size, logical_test
130 131 132

=cut

133 134 135 136 137
sub _run_test {
    my $self = shift @_;
    my $test = shift @_;

    my $description = $test->{description};
138
    my $query = $test->{subst_query};
139 140
    my $reference_size = $test->{reference_size};
    my $logical_test = $test->{logical_test};
141

142 143 144 145 146
    # Final semicolons are removed if present
    if ($query =~ /(;\s*$)/) {
        $query =~ s/$1//;
    }

147 148
    print "Test description: $description\n";
    print "Checking whether the number of rows $logical_test $reference_size\n";
149

150 151 152 153 154 155 156 157
    # This could benefit from 'switch' once we move to a more recent version of Perl
    my $maxrow = $reference_size;
    $maxrow++ if grep {$_ eq $logical_test} qw(= == > <= <> !=);

    $query .= " LIMIT $maxrow" unless $query =~ /LIMIT/i;
    print "Query: $query\n";

    my $sth = $self->data_dbc()->prepare($query);
158
    $sth->{mysql_use_result} = 1 if $self->data_dbc->driver eq 'mysql';
159 160 161
    $sth->execute();

    my $nrow = 0;
162
    while (defined $sth->fetchrow_arrayref()) {
163 164 165 166
        $nrow++;
    }
    $sth->finish;

167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
    print "$nrow rows returned".($nrow == $maxrow ? " (test aborted, there could be more rows)" : "")."\n";

    # This could benefit from 'switch' once we move to a more recent version of Perl
    my $success = 0;
    if ($logical_test eq '=' or $logical_test eq '==') {
        $success = 1 if $nrow == $reference_size;

    } elsif ($logical_test eq '<' or $logical_test eq '<=') {
        $success = 1 if $nrow < $maxrow;

    } elsif ($logical_test eq '>' or $logical_test eq '>=') {
        $success = 1 if $nrow >= $maxrow;

   } elsif ($logical_test eq '<>' or $logical_test eq '!=') {
        $success = 1 if $nrow != $reference_size;

    } else {
        die "This should not happen. A logical test is not checked";
    }
    warn $success ? "Success\n\n" : "Failure\n\n";
    return $success;
188 189
}

190

191
1;