Commit a3645169 authored by Leo Gordon's avatar Leo Gordon
Browse files

logging of deadlocks via callback

parent 1e402fa3
......@@ -248,7 +248,9 @@ sub decrease_semaphore_count_for_jobid { # used in semaphore annihilation or
WHERE job_id=? AND status='SEMAPHORED'
};
$self->dbc->protected_prepare_execute( [ $sql, $dec, $jobid ] );
$self->dbc->protected_prepare_execute( [ $sql, $dec, $jobid ],
sub { my ($after) = @_; $self->db->get_LogMessageAdaptor->store_hive_message( 'decreasing semaphore_count'.$after, 0 ); }
);
}
sub increase_semaphore_count_for_jobid { # used in semaphore propagation
......@@ -262,7 +264,9 @@ sub increase_semaphore_count_for_jobid { # used in semaphore propagation
WHERE job_id=?
};
$self->dbc->protected_prepare_execute( [ $sql, $inc, $jobid ] );
$self->dbc->protected_prepare_execute( [ $sql, $inc, $jobid ],
sub { my ($after) = @_; $self->db->get_LogMessageAdaptor->store_hive_message( 'increasing semaphore_count'.$after, 0 ); }
);
}
......@@ -280,6 +284,8 @@ sub increase_semaphore_count_for_jobid { # used in semaphore propagation
sub check_in_job {
my ($self, $job) = @_;
my $job_id = $job->dbID;
my $sql = "UPDATE job SET status='".$job->status."' ";
if($job->status eq 'DONE') {
......@@ -291,10 +297,12 @@ sub check_in_job {
} elsif($job->status eq 'READY') {
}
$sql .= " WHERE job_id='".$job->dbID."' ";
$sql .= " WHERE job_id='$job_id' ";
# This particular query is infamous for collisions and 'deadlock' situations; let's wait and retry:
$self->dbc->protected_prepare_execute( [ $sql ] );
$self->dbc->protected_prepare_execute( [ $sql ],
sub { my ($after) = @_; $self->db->get_LogMessageAdaptor->store_hive_message( "checking the job #$job_id in".$after, 0 ); }
);
}
......@@ -415,14 +423,22 @@ sub grab_jobs_for_role {
AND status='READY'
};
my $claim_count;
# we have to be explicitly numeric here because of '0E0' value returned by DBI if "no rows have been affected":
if( (my $claim_count = $self->dbc->protected_prepare_execute( [ $prefix_sql . $virgin_sql . $limit_sql . $offset_sql . $suffix_sql ] )) == 0 ) {
if( ($claim_count = $self->dbc->protected_prepare_execute( [ $prefix_sql . $limit_sql . $offset_sql . $suffix_sql ] )) == 0 ) {
$claim_count = $self->dbc->protected_prepare_execute( [ $prefix_sql . $limit_sql . $suffix_sql ] );
if( 0 == ($claim_count = $self->dbc->protected_prepare_execute( [ $prefix_sql . $virgin_sql . $limit_sql . $offset_sql . $suffix_sql ],
sub { my ($after) = @_; $self->db->get_LogMessageAdaptor->store_worker_message( $role->worker, "grabbing a virgin batch of offset jobs".$after, 0 ); }
))) {
if( 0 == ($claim_count = $self->dbc->protected_prepare_execute( [ $prefix_sql . $limit_sql . $offset_sql . $suffix_sql ],
sub { my ($after) = @_; $self->db->get_LogMessageAdaptor->store_worker_message( $role->worker, "grabbing a non-virgin batch of offset jobs".$after, 0 ); }
))) {
$claim_count = $self->dbc->protected_prepare_execute( [ $prefix_sql . $limit_sql . $suffix_sql ],
sub { my ($after) = @_; $self->db->get_LogMessageAdaptor->store_worker_message( $role->worker, "grabbing a non-virgin batch of non-offset jobs".$after, 0 ); }
);
}
}
return $self->fetch_all_by_role_id_AND_status($role_id, 'CLAIMED') ;
return $claim_count ? $self->fetch_all_by_role_id_AND_status($role_id, 'CLAIMED') : [];
}
......
......@@ -116,6 +116,7 @@ sub url {
sub protected_prepare_execute { # try to resolve certain mysql "Deadlocks" by trying again (a useful workaround even in mysql 5.1.61)
my $self = shift @_;
my $sql_params = shift @_;
my $deadlock_log_callback = shift @_;
my $sql_cmd = shift @$sql_params;
......@@ -134,17 +135,11 @@ sub protected_prepare_execute { # try to resolve certain mysql "Deadlocks" b
} or do {
if($@ =~ /Deadlock found when trying to get lock; try restarting transaction/) { # ignore this particular error
unless($log_message_adaptor) {
require Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
my $slave_dba = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new(
-dbconn => $self,
-no_sql_schema_version_check => 1,
);
$log_message_adaptor = $slave_dba->get_LogMessageAdaptor();
}
my $this_sleep_sec = rand( $sleep_max_sec );
$log_message_adaptor->store_hive_message( "Caught a DEADLOCK when trying to execute '$sql_cmd' (attempt #$attempt), retrying in $this_sleep_sec sec", 0 );
if( $deadlock_log_callback ) {
$deadlock_log_callback->( " temporarily failed due to a DEADLOCK in the database (attempt #$attempt). Will try again in $this_sleep_sec sec" );
}
usleep( $this_sleep_sec*1000000 );
$sleep_max_sec *= 2;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment