AnalysisStatsAdaptor.pm 9.97 KB
Newer Older
1
=pod
2 3

=head1 NAME
4

5
    Bio::EnsEMBL::Hive::DBSQL::AnalysisStatsAdaptor
6 7

=head1 SYNOPSIS
8

9 10
    $analysisStatsAdaptor = $db_adaptor->get_AnalysisStatsAdaptor;
    $analysisStatsAdaptor = $analysisStats->adaptor;
11 12

=head1 DESCRIPTION
13

14 15 16 17 18
    Module to encapsulate all db access for persistent class AnalysisStats.
    There should be just one per application and database connection.

=head1 LICENSE

19
    Copyright [1999-2014] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
20 21 22 23 24 25 26 27 28

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.
29 30

=head1 CONTACT
31

32
    Please contact ehive-users@ebi.ac.uk mailing list with questions/suggestions.
33 34

=head1 APPENDIX
35

36 37
    The rest of the documentation details each of the object methods.
    Internal methods are usually preceded with a _
38 39 40 41 42 43 44

=cut


package Bio::EnsEMBL::Hive::DBSQL::AnalysisStatsAdaptor;

use strict;
45

46
use Bio::EnsEMBL::Utils::Argument;
47 48
use Bio::EnsEMBL::Utils::Exception ('throw');
use Bio::EnsEMBL::Hive::AnalysisStats;
49

50
use base ('Bio::EnsEMBL::Hive::DBSQL::ObjectAdaptor');
51 52


53 54 55
sub default_table_name {
    return 'analysis_stats';
}
56 57


58 59 60 61 62 63 64 65 66 67 68
sub default_input_column_mapping {
    my $self    = shift @_;
    my $driver  = $self->dbc->driver();
    return  {
        'last_update' => {
                            'mysql'     => "UNIX_TIMESTAMP()-UNIX_TIMESTAMP(last_update) seconds_since_last_update ",
                            'sqlite'    => "strftime('%s','now')-strftime('%s',last_update) seconds_since_last_update ",
                            'pgsql'     => "EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - last_update) seconds_since_last_update ",
        }->{$driver},
    };
}
69

70

71 72
sub object_class {
    return 'Bio::EnsEMBL::Hive::AnalysisStats';
73 74 75
}


76 77
sub fetch_all_by_suitability_rc_id_meadow_type {
    my ($self, $resource_class_id, $meadow_type) = @_;
78

79 80 81
    my $join_and_filter_sql    = "JOIN analysis_base USING (analysis_id) WHERE "
                                .( $resource_class_id ? "resource_class_id=$resource_class_id AND " : '')
                                .( $meadow_type       ? "(meadow_type IS NULL OR meadow_type='$meadow_type') AND " : '');
Leo Gordon's avatar
Leo Gordon committed
82

83
        # the ones that clearly have work to do:
84 85
    my $primary_sql     = "num_required_workers>0 AND status in ('READY', 'WORKING') "
                         ."ORDER BY priority DESC, ".( ($self->dbc->driver eq 'mysql') ? 'RAND()' : 'RANDOM()' );
86 87

        # the ones that may have work to do after a sync:
88 89 90 91 92
    my $secondary_sql   = "status in ('LOADING', 'BLOCKED', 'ALL_CLAIMED', 'SYNCHING') "
                         ."ORDER BY last_update";   # FIXME: could mix in a.priority if sync is not too expensive?

    my $primary_results     = $self->fetch_all( $join_and_filter_sql . $primary_sql   );
    my $secondary_results   = $self->fetch_all( $join_and_filter_sql . $secondary_sql );
93 94

    return [ @$primary_results, @$secondary_results ];
95 96
}

97

98 99 100 101 102 103 104 105
=head2 refresh

  Arg [1]    : Bio::EnsEMBL::Hive::AnalysisStats object
  Description: reload the AnalysisStats object from the database
  Returntype : Bio::EnsEMBL::Hive::AnalysisStats object - same one with reloaded data

=cut

106
sub refresh {
107
    my ($self, $stats) = @_;
108

109
    my $new_stats = $self->fetch_by_analysis_id( $stats->analysis_id );     # fetch into a separate object
110

111
    %$stats = %$new_stats;                                                  # copy the data over
112

113
    return $stats;
114 115 116
}


117
################
118
#
119
# UPDATE METHODS
120 121 122
#
################

123

124
=head2 update
125

126 127 128 129 130 131
  Arg [1]    : Bio::EnsEMBL::Hive::AnalysisStats object
  Example    :
  Description:
  Returntype : Bio::EnsEMBL::Hive::Worker
  Exceptions :
  Caller     :
132

133 134 135 136 137
=cut

sub update {
  my ($self, $stats) = @_;

138 139 140
  my $hive_capacity = $stats->hive_capacity;

  if ($stats->behaviour eq "DYNAMIC") {
141 142 143 144 145

    my $max_hive_capacity = $stats->avg_input_msec_per_job
        ? int($stats->input_capacity * $stats->avg_msec_per_job / $stats->avg_input_msec_per_job)
        : $hive_capacity;

146 147 148 149 150 151
    if ($stats->avg_output_msec_per_job) {
      my $max_hive_capacity2 = int($stats->output_capacity * $stats->avg_msec_per_job / $stats->avg_output_msec_per_job);
      if ($max_hive_capacity2 < $max_hive_capacity) {
        $max_hive_capacity = $max_hive_capacity2;
      }
    }
152 153

    $stats->hive_capacity( int( ($hive_capacity+$max_hive_capacity+1)/2 ) );
154 155
  }

156
  my $sql = "UPDATE analysis_stats SET status='".$stats->status."' ";
157
  $sql .= ",batch_size=" . $stats->batch_size();
158
  $sql .= ",hive_capacity=" . (defined($stats->hive_capacity()) ? $stats->hive_capacity() : 'NULL');
159

160
  $sql .= ",avg_msec_per_job=" . $stats->avg_msec_per_job();
161 162 163
  $sql .= ",avg_input_msec_per_job=" . $stats->avg_input_msec_per_job();
  $sql .= ",avg_run_msec_per_job=" . $stats->avg_run_msec_per_job();
  $sql .= ",avg_output_msec_per_job=" . $stats->avg_output_msec_per_job();
164 165 166

  unless( $self->db->hive_use_triggers() ) {
      $sql .= ",total_job_count=" . $stats->total_job_count();
167 168
      $sql .= ",semaphored_job_count=" . $stats->semaphored_job_count();
      $sql .= ",ready_job_count=" . $stats->ready_job_count();
169 170 171
      $sql .= ",done_job_count=" . $stats->done_job_count();
      $sql .= ",failed_job_count=" . $stats->failed_job_count();

172 173 174
      $stats->num_running_workers( $self->db->get_Queen->count_running_workers( $stats->analysis_id() ) );
      $sql .= ",num_running_workers=" . $stats->num_running_workers();
  }
175

176
  $sql .= ",num_required_workers=" . $stats->num_required_workers();
177
  $sql .= ",last_update=CURRENT_TIMESTAMP";
Leo Gordon's avatar
Leo Gordon committed
178
  $sql .= ",sync_lock='0'";
179 180 181 182 183
  $sql .= " WHERE analysis_id='".$stats->analysis_id."' ";

  my $sth = $self->prepare($sql);
  $sth->execute();
  $sth->finish;
184
  $sth = $self->prepare("INSERT INTO analysis_stats_monitor SELECT CURRENT_TIMESTAMP, analysis_stats.* from analysis_stats WHERE analysis_id = ".$stats->analysis_id);
185 186
  $sth->execute();
  $sth->finish;
187 188 189 190
  $stats->seconds_since_last_update(0); #not exact but good enough :)
}


191
sub update_status {
192 193 194 195 196 197 198 199 200 201 202
  my ($self, $analysis_id, $status) = @_;

  my $sql = "UPDATE analysis_stats SET status='$status' ";
  $sql .= " WHERE analysis_id='$analysis_id' ";

  my $sth = $self->prepare($sql);
  $sth->execute();
  $sth->finish;
}


203 204 205 206 207
=head2 interval_update_work_done

  Arg [1]     : int $analysis_id
  Arg [2]     : int $jobs_done_in_interval
  Arg [3]     : int $interval_msec
208 209 210 211 212
  Arg [4]     : int $fetching_msec
  Arg [5]     : int $running_msec
  Arg [6]     : int $writing_msec
  Arg [7]     : real $weight_factor [optional]
  Example     : $statsDBA->interval_update_work_done($analysis_id, $jobs_done, $interval_msec, $fetching_msec, $running_msec, $writing_msec);
213 214 215 216 217 218 219
  Description : does a database update to recalculate the avg_msec_per_job and done_job_count
                does an interval equation by multiplying out the previous done_job_count with the
                previous avg_msec_per_job and then expanding by new interval values to give a better average.
  Caller      : Bio::EnsEMBL::Hive::Worker

=cut

220 221
sub interval_update_work_done {
  my ($self, $analysis_id, $job_count, $interval_msec, $fetching_msec, $running_msec, $writing_msec, $weight_factor) = @_;
222 223

  $weight_factor ||= 3; # makes it more sensitive to the dynamics of the farm
224

225 226 227 228 229 230 231 232 233 234
  my $sql = $self->db->hive_use_triggers()
  ? qq{
    UPDATE analysis_stats SET
        avg_msec_per_job = (((done_job_count*avg_msec_per_job)/$weight_factor + $interval_msec) / (done_job_count/$weight_factor + $job_count)), 
        avg_input_msec_per_job = (((done_job_count*avg_input_msec_per_job)/$weight_factor + $fetching_msec) / (done_job_count/$weight_factor + $job_count)), 
        avg_run_msec_per_job = (((done_job_count*avg_run_msec_per_job)/$weight_factor + $running_msec) / (done_job_count/$weight_factor + $job_count)), 
        avg_output_msec_per_job = (((done_job_count*avg_output_msec_per_job)/$weight_factor + $writing_msec) / (done_job_count/$weight_factor + $job_count))
    WHERE analysis_id= $analysis_id
  }
  : qq{
235 236 237 238 239
    UPDATE analysis_stats SET
        avg_msec_per_job = (((done_job_count*avg_msec_per_job)/$weight_factor + $interval_msec) / (done_job_count/$weight_factor + $job_count)), 
        avg_input_msec_per_job = (((done_job_count*avg_input_msec_per_job)/$weight_factor + $fetching_msec) / (done_job_count/$weight_factor + $job_count)), 
        avg_run_msec_per_job = (((done_job_count*avg_run_msec_per_job)/$weight_factor + $running_msec) / (done_job_count/$weight_factor + $job_count)), 
        avg_output_msec_per_job = (((done_job_count*avg_output_msec_per_job)/$weight_factor + $writing_msec) / (done_job_count/$weight_factor + $job_count)), 
240
        ready_job_count = ready_job_count - $job_count, 
Leo Gordon's avatar
Leo Gordon committed
241
        done_job_count = done_job_count + $job_count 
242 243
    WHERE analysis_id= $analysis_id
  };
244

245 246 247 248
  $self->dbc->do($sql);
}


249
sub increase_running_workers {
250 251 252 253 254 255 256 257 258 259
  my $self = shift;
  my $analysis_id = shift;

  my $sql = "UPDATE analysis_stats SET num_running_workers = num_running_workers + 1 ".
      " WHERE analysis_id='$analysis_id'";

  $self->dbc->do($sql);
}


260
sub decrease_running_workers {
261 262 263 264 265 266 267 268 269
  my $self = shift;
  my $analysis_id = shift;

  my $sql = "UPDATE analysis_stats SET num_running_workers = num_running_workers - 1 ".
      " WHERE analysis_id='$analysis_id'";

  $self->dbc->do($sql);
}

270 271

sub decrease_required_workers {
272 273 274
  my $self = shift;
  my $analysis_id = shift;

275 276
  my $sql = "UPDATE analysis_stats SET num_required_workers=num_required_workers-1 ".
            "WHERE analysis_id='$analysis_id' ";
277

278 279 280 281
  $self->dbc->do($sql);
}


282
sub increase_required_workers {
283 284 285 286 287 288 289
  my $self = shift;
  my $analysis_id = shift;

  my $sql = "UPDATE analysis_stats SET num_required_workers=num_required_workers+1 ".
            "WHERE analysis_id='$analysis_id' ";

  $self->dbc->do($sql);
290 291 292
}


293 294
1;