<divclass="line"> my $collection = <aclass="code"href="class_bio_1_1_ens_e_m_b_l_1_1_hive_1_1_dataflow_rule.html">Bio::EnsEMBL::Hive::DataflowRule</a>-><aclass="code"href="class_bio_1_1_ens_e_m_b_l_1_1_hive_1_1_cacheable.html#a1c4430cb41931838b7bcf32ddc0e76a6">collection</a>();</div>
<divclass="line"> ? <spanclass="stringliteral">"status = CAST(CASE WHEN semaphore_count>0 THEN 'SEMAPHORED' ELSE 'READY' END AS jw_status) "</span></div>
<divclass="line"> : <spanclass="stringliteral">"status = CASE WHEN semaphore_count>0 THEN 'SEMAPHORED' ELSE 'READY' END "</span></div>
<divclass="line"> ).<spanclass="stringliteral">" WHERE job_id=? AND status='SEMAPHORED'"</span>;</div>
<divclass="line"> ).<spanclass="stringliteral">" WHERE job_id=? AND status IN ('SEMAPHORED', 'READY')"</span>;</div>
<divclass="line"></div>
<divclass="line"> my $find_sth = $self->prepare($find_sql);</div>
<divclass="line"> my $update_sth = $self->prepare($update_sql);</div>
...
...
@@ -901,6 +901,11 @@ Description</h1>
<divclass="line"> WHERE job_id=$job_id</div>
<divclass="line"> AND status in (<spanclass="stringliteral">'CLAIMED'</span>,<spanclass="stringliteral">'PRE_CLEANUP'</span>,<spanclass="stringliteral">'FETCH_INPUT'</span>,<spanclass="stringliteral">'RUN'</span>,<spanclass="stringliteral">'WRITE_OUTPUT'</span>,<spanclass="stringliteral">'POST_CLEANUP'</span>)</div>
<divclass="line"> } );</div>
<divclass="line"></div>
<divclass="line"><spanclass="preprocessor"> # FIXME: move the decision making completely to the API side and so avoid the potential race condition.</span></div>
<divclass="line"><spanclass="preprocessor"></span> my $job = $self->fetch_by_dbID( $job_id );</div>
<divclass="line"> die <spanclass="stringliteral">"Database name '$dbname' is too long (> $limits{$driver}). Cannot create the database\n"</span>;</div>
<divclass="line"><spanclass="keywordflow">return</span> $self->fetch_all( <spanclass="stringliteral">"JOIN job USING(role_id) WHERE when_finished IS NOT NULL AND status NOT IN ('DONE', 'READY', 'FAILED', 'PASSED_ON') GROUP BY role_id"</span> );</div>
<divclass="line"><spanclass="preprocessor"> # the list should contain all status'es that are not "in progress":</span></div>
<divclass="line"><spanclass="preprocessor"></span><spanclass="keywordflow">return</span> $self->fetch_all( <spanclass="stringliteral">"JOIN job USING(role_id) WHERE when_finished IS NOT NULL AND status NOT IN ('DONE', 'READY', 'FAILED', 'PASSED_ON', 'SEMAPHORED') GROUP BY role_id"</span> );</div>
<divclass="line"><spanclass="stringliteral">'TERM_OWNER'</span> =><spanclass="stringliteral">'KILLED_BY_USER'</span>, # bkill (wait until it dies)</div>
<divclass="fragment"><divclass="line">sub <aclass="code"href="class_bio_1_1_ens_e_m_b_l_1_1_hive_1_1_queen.html#a020be70f3da4eca961fef537e68daca6">check_for_dead_workers</a> { # scans the whole Valley <spanclass="keywordflow">for</span> lost Workers (but ignores unreachable ones)</div>
<divclass="line"> my ($self, $valley, $check_buried_in_haste) = @_;</div>
<divclass="line"> my ($self, $valley, $check_buried_in_haste, $bury_unkwn_workers) = @_;</div>
<divclass="line"></div>
<divclass="line"> my $last_few_seconds = 5; # FIXME: It is probably a good idea to expose <spanclass="keyword">this</span> parameter <spanclass="keywordflow">for</span> easier tuning.</div>
<divclass="line"></div>
...
...
@@ -336,80 +336,95 @@ Description</h1>
<divclass="line"></div>
<divclass="line"> my $meadow_type = $worker->meadow_type;</div>
<divclass="line"> my $process_id = $worker->process_id;</div>
<divclass="line"><spanclass="keywordflow">if</span>(my $status = $pid_to_worker_status->{$process_id}) { # can be RUN|PEND|xSUSP</div>
<divclass="line"> $meadow_status_counts{$meadow_signature}{<spanclass="stringliteral">'UNREACHABLE'</span>}++; # Worker is unreachable from <spanclass="keyword">this</span> Valley</div>
<divclass="line"> die <spanclass="stringliteral">"Your Meadow::$meadow_type driver now has to support get_report_entries_for_process_ids() method instead of find_out_causes(). Please update it.\n"</span>;</div>
<divclass="line"> warn <spanclass="stringliteral">"GarbageCollector:\t$meadow_type meadow does not support post-mortem examination\n"</span>;</div>
<divclass="line"> }</div>
<divclass="line"></div>
<divclass="line"> warn <spanclass="stringliteral">"GarbageCollector:\tReleasing the jobs\n"</span>;</div>
<divclass="line"><spanclass="keywordflow">while</span>(my ($process_id, $worker) = each %$pid_to_lost_worker) {</div>
<divclass="line"><spanclass="keywordflow">if</span>( %$report_entries ) { # use the opportunity to also <aclass="code"href="class_bio_1_1_ens_e_m_b_l_1_1_hive_1_1_d_b_s_q_l_1_1_base_adaptor.html#a23e67bcbf6294126212da34c0c5a2a90">store</a> resource <aclass="code"href="all__tests_8pl.html#a9449f3118943816afb79e9c3394972ca">usage</a> of the buried workers:</div>
<divclass="line"><spanclass="keywordflow">if</span>($worker->meadow_user eq $ENV{<spanclass="stringliteral">'USER'</span>}) { # <spanclass="keywordflow">if</span> I<spanclass="stringliteral">'m actually allowed to kill the worker...</span></div>
<divclass="line"><spanclass="stringliteral"> warn "GarbageCollector:\tKilling/forgetting the UNKWN worker by process_id $process_id";</span></div>
<divclass="line"><spanclass="stringliteral"> $meadow_status_counts{$meadow_signature}{'</span>UNREACHABLE<spanclass="stringliteral">'}++; # Worker is unreachable from this Valley</span></div>
<divclass="line"><spanclass="stringliteral"> die "Your Meadow::$meadow_type driver now has to support get_report_entries_for_process_ids() method instead of find_out_causes(). Please update it.\n";</span></div>
<divclass="line"><spanclass="stringliteral"> warn "GarbageCollector:\t$meadow_type meadow does not support post-mortem examination\n";</span></div>
<divclass="line"><spanclass="stringliteral"> if( %$report_entries ) { # use the opportunity to also store resource usage of the buried workers:</span></div>
<divclass="line"><spanclass="stringliteral"> warn "GarbageCollector:\tChecking for Workers/Roles buried in haste...\n";</span></div>
<divclass="line"><spanclass="stringliteral"> my $buried_in_haste_list = $self->db->get_RoleAdaptor->fetch_all_finished_roles_with_unfinished_jobs();</span></div>
<divclass="line"> $self->dbc and $self->dbc->disconnect_when_inactive(1); # release <spanclass="keyword">this</span> connection <spanclass="keywordflow">for</span> the duration of system() call</div>
<divclass="line"> $self->dbc and $self->dbc->disconnect_when_inactive(0); # allow the <aclass="code"href="class_bio_1_1_ens_e_m_b_l_1_1_hive_1_1_process.html#a9d01ac47d41d31f30aca69b0220e0d9d">worker</a> to keep the connection open again</div>
<divclass="line"></div>
...
...
@@ -321,7 +320,16 @@ Description</h1>
<divclass="line"> my $stderr = $self->param(<spanclass="stringliteral">'stderr'</span>);</div>
<divclass="line"> my $flat_cmd = $self->param(<spanclass="stringliteral">'flat_cmd'</span>);</div>
<divclass="line"><spanclass="preprocessor"> # If we reach this point, perhaps it was killed by a user</span></div>
<divclass="line"><spanclass="preprocessor"></span> die sprintf( <spanclass="stringliteral">"'%s' was killed with code=%d\nstderr is: %s\n"</span>, $flat_cmd, $return_value, $stderr);</div>
<divclass="line"></div>
<divclass="line"> } elsif ($return_value) {</div>
<divclass="line"><spanclass="preprocessor"> # "Normal" process exit with a non-zero code</span></div>