AnalysisStatsAdaptor.pm 7.42 KB
Newer Older
1
=pod
2 3

=head1 NAME
4

5
    Bio::EnsEMBL::Hive::DBSQL::AnalysisStatsAdaptor
6 7

=head1 SYNOPSIS
8

9 10
    $analysisStatsAdaptor = $db_adaptor->get_AnalysisStatsAdaptor;
    $analysisStatsAdaptor = $analysisStats->adaptor;
11 12

=head1 DESCRIPTION
13

14 15 16 17 18
    Module to encapsulate all db access for persistent class AnalysisStats.
    There should be just one per application and database connection.

=head1 LICENSE

19
    Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
nwillhoft's avatar
nwillhoft committed
20
    Copyright [2016-2021] EMBL-European Bioinformatics Institute
21 22 23 24 25 26 27 28 29

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.
30 31

=head1 CONTACT
32

33
    Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates
34 35

=head1 APPENDIX
36

37 38
    The rest of the documentation details each of the object methods.
    Internal methods are usually preceded with a _
39 40 41 42 43 44 45

=cut


package Bio::EnsEMBL::Hive::DBSQL::AnalysisStatsAdaptor;

use strict;
46
use warnings;
47

48
use base ('Bio::EnsEMBL::Hive::DBSQL::ObjectAdaptor');
49 50


51 52 53
sub default_table_name {
    return 'analysis_stats';
}
54 55


56 57 58 59
sub default_input_column_mapping {
    my $self    = shift @_;
    my $driver  = $self->dbc->driver();
    return  {
60 61 62 63
        'when_updated' => {
                            'mysql'     => "UNIX_TIMESTAMP()-UNIX_TIMESTAMP(when_updated) seconds_since_when_updated ",
                            'sqlite'    => "strftime('%s','now')-strftime('%s',when_updated) seconds_since_when_updated ",
                            'pgsql'     => "EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - when_updated) seconds_since_when_updated ",
64 65 66
        }->{$driver},
    };
}
67

68

69
sub do_not_update_columns {
70
    return ['when_updated'];
71 72 73
}


74 75
sub object_class {
    return 'Bio::EnsEMBL::Hive::AnalysisStats';
76 77 78
}


79 80 81 82 83 84 85 86 87 88 89
sub objectify {     # turn the hashref into an object
    my ($self, $hashref) = @_;

    my $object = $self->SUPER::objectify( $hashref );

    $object->seconds_since_last_fetch(0);

    return $object;
}


90
################
91
#
92
# UPDATE METHODS
93 94 95
#
################

96

97
sub update_stats_and_monitor {
98 99
  my ($self, $stats) = @_;

100
  my $sql = "UPDATE analysis_stats SET status='".$stats->status."' ";
101

102
  $sql .= ",avg_msec_per_job=" . $stats->avg_msec_per_job();
103 104 105
  $sql .= ",avg_input_msec_per_job=" . $stats->avg_input_msec_per_job();
  $sql .= ",avg_run_msec_per_job=" . $stats->avg_run_msec_per_job();
  $sql .= ",avg_output_msec_per_job=" . $stats->avg_output_msec_per_job();
106
  $sql .= ",is_excluded=" . $stats->is_excluded();
107

108
  unless( $stats->hive_pipeline->hive_use_triggers() ) {
109
      $sql .= ",total_job_count=" . $stats->total_job_count();
110 111
      $sql .= ",semaphored_job_count=" . $stats->semaphored_job_count();
      $sql .= ",ready_job_count=" . $stats->ready_job_count();
112 113 114
      $sql .= ",done_job_count=" . $stats->done_job_count();
      $sql .= ",failed_job_count=" . $stats->failed_job_count();

115
      $stats->num_running_workers( $self->db->get_RoleAdaptor->count_active_roles( $stats->analysis_id() ) );
116 117
      $sql .= ",num_running_workers=" . $stats->num_running_workers();
  }
118

119
  $sql .= ",when_updated=CURRENT_TIMESTAMP";
Leo Gordon's avatar
Leo Gordon committed
120
  $sql .= ",sync_lock='0'";
121 122 123 124 125
  $sql .= " WHERE analysis_id='".$stats->analysis_id."' ";

  my $sth = $self->prepare($sql);
  $sth->execute();
  $sth->finish;
126
  $sth = $self->prepare("INSERT INTO analysis_stats_monitor SELECT CURRENT_TIMESTAMP, analysis_stats.* from analysis_stats WHERE analysis_id = ".$stats->analysis_id);
127 128
  $sth->execute();
  $sth->finish;
129
  $stats->seconds_since_when_updated(0); #not exact but good enough :)
130 131 132
}


133
sub update_status {
134 135
  my ($self, $analysis_id, $status) = @_;

136
  my $sql = "UPDATE analysis_stats SET status='$status' WHERE analysis_id='$analysis_id' ";
137 138 139 140 141 142 143

  my $sth = $self->prepare($sql);
  $sth->execute();
  $sth->finish;
}


144 145 146 147 148
=head2 interval_update_work_done

  Arg [1]     : int $analysis_id
  Arg [2]     : int $jobs_done_in_interval
  Arg [3]     : int $interval_msec
149 150 151 152 153
  Arg [4]     : int $fetching_msec
  Arg [5]     : int $running_msec
  Arg [6]     : int $writing_msec
  Arg [7]     : real $weight_factor [optional]
  Example     : $statsDBA->interval_update_work_done($analysis_id, $jobs_done, $interval_msec, $fetching_msec, $running_msec, $writing_msec);
154 155 156 157 158 159 160
  Description : does a database update to recalculate the avg_msec_per_job and done_job_count
                does an interval equation by multiplying out the previous done_job_count with the
                previous avg_msec_per_job and then expanding by new interval values to give a better average.
  Caller      : Bio::EnsEMBL::Hive::Worker

=cut

161 162
sub interval_update_work_done {
  my ($self, $analysis_id, $job_count, $interval_msec, $fetching_msec, $running_msec, $writing_msec, $weight_factor) = @_;
163 164

  $weight_factor ||= 3; # makes it more sensitive to the dynamics of the farm
165

166
  my $sql = $self->db->hive_pipeline->hive_use_triggers()
167 168
  ? qq{
    UPDATE analysis_stats SET
169 170 171 172
        avg_msec_per_job = ROUND(((done_job_count*avg_msec_per_job)/$weight_factor + $interval_msec) / (done_job_count/$weight_factor + $job_count)),
        avg_input_msec_per_job = ROUND(((done_job_count*avg_input_msec_per_job)/$weight_factor + $fetching_msec) / (done_job_count/$weight_factor + $job_count)),
        avg_run_msec_per_job = ROUND(((done_job_count*avg_run_msec_per_job)/$weight_factor + $running_msec) / (done_job_count/$weight_factor + $job_count)),
        avg_output_msec_per_job = ROUND(((done_job_count*avg_output_msec_per_job)/$weight_factor + $writing_msec) / (done_job_count/$weight_factor + $job_count))
173 174 175
    WHERE analysis_id= $analysis_id
  }
  : qq{
176
    UPDATE analysis_stats SET
177 178 179 180 181
        avg_msec_per_job = ROUND(((done_job_count*avg_msec_per_job)/$weight_factor + $interval_msec) / (done_job_count/$weight_factor + $job_count)),
        avg_input_msec_per_job = ROUND(((done_job_count*avg_input_msec_per_job)/$weight_factor + $fetching_msec) / (done_job_count/$weight_factor + $job_count)),
        avg_run_msec_per_job = ROUND(((done_job_count*avg_run_msec_per_job)/$weight_factor + $running_msec) / (done_job_count/$weight_factor + $job_count)),
        avg_output_msec_per_job = ROUND(((done_job_count*avg_output_msec_per_job)/$weight_factor + $writing_msec) / (done_job_count/$weight_factor + $job_count)),
        done_job_count = done_job_count + $job_count
182 183
    WHERE analysis_id= $analysis_id
  };
184

185
  $self->dbc->do( $sql );
186 187 188
}


189 190
sub increment_a_counter {
    my ($self, $counter, $increment, $analysis_id) = @_;
191

192
    unless( $self->db->hive_pipeline->hive_use_triggers() ) {
193
        if($increment) {    # can either be positive or negative
194 195 196
## ToDo: does it make sense to update the timestamp as well, to signal to the sync-allowed workers that they should wait?
#            $self->dbc->do( "UPDATE analysis_stats SET $counter = $counter + ($increment), when_updated=CURRENT_TIMESTAMP WHERE sync_lock=0 AND analysis_id='$analysis_id'" );
            $self->dbc->do( "UPDATE analysis_stats SET $counter = $counter + ($increment) WHERE sync_lock=0 AND analysis_id='$analysis_id'" );
197 198
        }
    }
199 200
}

201 202
1;