AnalysisStatsAdaptor.pm 7.58 KB
Newer Older
1
=pod
2 3

=head1 NAME
4

5
    Bio::EnsEMBL::Hive::DBSQL::AnalysisStatsAdaptor
6 7

=head1 SYNOPSIS
8

9 10
    $analysisStatsAdaptor = $db_adaptor->get_AnalysisStatsAdaptor;
    $analysisStatsAdaptor = $analysisStats->adaptor;
11 12

=head1 DESCRIPTION
13

14 15 16 17 18
    Module to encapsulate all db access for persistent class AnalysisStats.
    There should be just one per application and database connection.

=head1 LICENSE

19
    Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Brandon Walts's avatar
Brandon Walts committed
20
    Copyright [2016-2020] EMBL-European Bioinformatics Institute
21 22 23 24 25 26 27 28 29

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.
30 31

=head1 CONTACT
32

33
    Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates
34 35

=head1 APPENDIX
36

37 38
    The rest of the documentation details each of the object methods.
    Internal methods are usually preceded with a _
39 40 41 42 43 44 45

=cut


package Bio::EnsEMBL::Hive::DBSQL::AnalysisStatsAdaptor;

use strict;
46
use warnings;
47

48
use Bio::EnsEMBL::Hive::AnalysisStats;
49

50
use base ('Bio::EnsEMBL::Hive::DBSQL::ObjectAdaptor');
51 52


53 54 55
sub default_table_name {
    return 'analysis_stats';
}
56 57


58 59 60 61
sub default_input_column_mapping {
    my $self    = shift @_;
    my $driver  = $self->dbc->driver();
    return  {
62 63 64 65
        'when_updated' => {
                            'mysql'     => "UNIX_TIMESTAMP()-UNIX_TIMESTAMP(when_updated) seconds_since_when_updated ",
                            'sqlite'    => "strftime('%s','now')-strftime('%s',when_updated) seconds_since_when_updated ",
                            'pgsql'     => "EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - when_updated) seconds_since_when_updated ",
66 67 68
        }->{$driver},
    };
}
69

70

71
sub do_not_update_columns {
72
    return ['when_updated'];
73 74 75
}


76 77
sub object_class {
    return 'Bio::EnsEMBL::Hive::AnalysisStats';
78 79 80
}


81
################
82
#
83
# UPDATE METHODS
84 85 86
#
################

87

88
sub update_stats_and_monitor {
89 90
  my ($self, $stats) = @_;

91
  my $sql = "UPDATE analysis_stats SET status='".$stats->status."' ";
92 93

  if ($stats->behaviour eq "DYNAMIC") {
94

95
    my $hive_capacity = $stats->hive_capacity;
96 97 98 99
    my $max_hive_capacity = $stats->avg_input_msec_per_job
        ? int($stats->input_capacity * $stats->avg_msec_per_job / $stats->avg_input_msec_per_job)
        : $hive_capacity;

100 101 102 103 104 105
    if ($stats->avg_output_msec_per_job) {
      my $max_hive_capacity2 = int($stats->output_capacity * $stats->avg_msec_per_job / $stats->avg_output_msec_per_job);
      if ($max_hive_capacity2 < $max_hive_capacity) {
        $max_hive_capacity = $max_hive_capacity2;
      }
    }
106 107

    $stats->hive_capacity( int( ($hive_capacity+$max_hive_capacity+1)/2 ) );
108
    $sql .= ",hive_capacity=" . (defined($stats->hive_capacity()) ? $stats->hive_capacity() : 'NULL');
109 110
  }

111
  $sql .= ",avg_msec_per_job=" . $stats->avg_msec_per_job();
112 113 114
  $sql .= ",avg_input_msec_per_job=" . $stats->avg_input_msec_per_job();
  $sql .= ",avg_run_msec_per_job=" . $stats->avg_run_msec_per_job();
  $sql .= ",avg_output_msec_per_job=" . $stats->avg_output_msec_per_job();
115 116 117

  unless( $self->db->hive_use_triggers() ) {
      $sql .= ",total_job_count=" . $stats->total_job_count();
118 119
      $sql .= ",semaphored_job_count=" . $stats->semaphored_job_count();
      $sql .= ",ready_job_count=" . $stats->ready_job_count();
120 121 122
      $sql .= ",done_job_count=" . $stats->done_job_count();
      $sql .= ",failed_job_count=" . $stats->failed_job_count();

123
      $stats->num_running_workers( $self->db->get_RoleAdaptor->count_active_roles( $stats->analysis_id() ) );
124 125
      $sql .= ",num_running_workers=" . $stats->num_running_workers();
  }
126

127
  $sql .= ",when_updated=CURRENT_TIMESTAMP";
Leo Gordon's avatar
Leo Gordon committed
128
  $sql .= ",sync_lock='0'";
129 130 131 132 133
  $sql .= " WHERE analysis_id='".$stats->analysis_id."' ";

  my $sth = $self->prepare($sql);
  $sth->execute();
  $sth->finish;
134
  $sth = $self->prepare("INSERT INTO analysis_stats_monitor SELECT CURRENT_TIMESTAMP, analysis_stats.* from analysis_stats WHERE analysis_id = ".$stats->analysis_id);
135 136
  $sth->execute();
  $sth->finish;
137
  $stats->seconds_since_when_updated(0); #not exact but good enough :)
138 139 140
}


141
sub update_status {
142 143
  my ($self, $analysis_id, $status) = @_;

144
  my $sql = "UPDATE analysis_stats SET status='$status' WHERE analysis_id='$analysis_id' ";
145 146 147 148 149 150 151

  my $sth = $self->prepare($sql);
  $sth->execute();
  $sth->finish;
}


152 153 154 155 156
=head2 interval_update_work_done

  Arg [1]     : int $analysis_id
  Arg [2]     : int $jobs_done_in_interval
  Arg [3]     : int $interval_msec
157 158 159 160 161
  Arg [4]     : int $fetching_msec
  Arg [5]     : int $running_msec
  Arg [6]     : int $writing_msec
  Arg [7]     : real $weight_factor [optional]
  Example     : $statsDBA->interval_update_work_done($analysis_id, $jobs_done, $interval_msec, $fetching_msec, $running_msec, $writing_msec);
162 163 164 165 166 167 168
  Description : does a database update to recalculate the avg_msec_per_job and done_job_count
                does an interval equation by multiplying out the previous done_job_count with the
                previous avg_msec_per_job and then expanding by new interval values to give a better average.
  Caller      : Bio::EnsEMBL::Hive::Worker

=cut

169 170
sub interval_update_work_done {
  my ($self, $analysis_id, $job_count, $interval_msec, $fetching_msec, $running_msec, $writing_msec, $weight_factor) = @_;
171 172

  $weight_factor ||= 3; # makes it more sensitive to the dynamics of the farm
173

174 175 176
  my $sql = $self->db->hive_use_triggers()
  ? qq{
    UPDATE analysis_stats SET
177 178 179 180
        avg_msec_per_job = ROUND(((done_job_count*avg_msec_per_job)/$weight_factor + $interval_msec) / (done_job_count/$weight_factor + $job_count)),
        avg_input_msec_per_job = ROUND(((done_job_count*avg_input_msec_per_job)/$weight_factor + $fetching_msec) / (done_job_count/$weight_factor + $job_count)),
        avg_run_msec_per_job = ROUND(((done_job_count*avg_run_msec_per_job)/$weight_factor + $running_msec) / (done_job_count/$weight_factor + $job_count)),
        avg_output_msec_per_job = ROUND(((done_job_count*avg_output_msec_per_job)/$weight_factor + $writing_msec) / (done_job_count/$weight_factor + $job_count))
181 182 183
    WHERE analysis_id= $analysis_id
  }
  : qq{
184
    UPDATE analysis_stats SET
185 186 187 188 189
        avg_msec_per_job = ROUND(((done_job_count*avg_msec_per_job)/$weight_factor + $interval_msec) / (done_job_count/$weight_factor + $job_count)),
        avg_input_msec_per_job = ROUND(((done_job_count*avg_input_msec_per_job)/$weight_factor + $fetching_msec) / (done_job_count/$weight_factor + $job_count)),
        avg_run_msec_per_job = ROUND(((done_job_count*avg_run_msec_per_job)/$weight_factor + $running_msec) / (done_job_count/$weight_factor + $job_count)),
        avg_output_msec_per_job = ROUND(((done_job_count*avg_output_msec_per_job)/$weight_factor + $writing_msec) / (done_job_count/$weight_factor + $job_count)),
        done_job_count = done_job_count + $job_count
190 191
    WHERE analysis_id= $analysis_id
  };
192

193
  $self->dbc->do( $sql );
194 195 196
}


197 198
sub increment_a_counter {
    my ($self, $counter, $increment, $analysis_id) = @_;
199

200 201 202 203 204
    unless( $self->db->hive_use_triggers() ) {
        if($increment) {    # can either be positive or negative
            $self->dbc->do( "UPDATE analysis_stats SET $counter = $counter + ($increment) WHERE analysis_id='$analysis_id'" );
        }
    }
205 206
}

207 208
1;