Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl-hive
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
7
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl-hive
Commits
259b97dc
Commit
259b97dc
authored
19 years ago
by
Jessica Severin
Browse files
Options
Downloads
Patches
Plain Diff
removed lsf_beekeeper.pl since it has been replaced by beekeeper.pl
parent
c01a3433
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
scripts/lsf_beekeeper.pl
+0
-367
0 additions, 367 deletions
scripts/lsf_beekeeper.pl
with
0 additions
and
367 deletions
scripts/lsf_beekeeper.pl
deleted
100755 → 0
+
0
−
367
View file @
c01a3433
#!/usr/local/ensembl/bin/perl -w
use
strict
;
use
DBI
;
use
Getopt::
Long
;
use
Bio::EnsEMBL::Hive::DBSQL::
DBAdaptor
;
use
Bio::EnsEMBL::Hive::
Worker
;
use
Bio::EnsEMBL::Hive::
Queen
;
use
Bio::EnsEMBL::Hive::
URLFactory
;
use
Sys::
Hostname
;
use
Bio::EnsEMBL::Hive::DBSQL::
AnalysisCtrlRuleAdaptor
;
# ok this is a hack, but I'm going to pretend I've got an object here
# by creating a blessed hash ref and passing it around like an object
# this is to avoid using global variables in functions, and to consolidate
# the globals into a nice '$self' package
my
$self
=
bless
{};
$self
->
{'
db_conf
'}
=
{};
$self
->
{'
db_conf
'}
->
{'
-user
'}
=
'
ensro
';
$self
->
{'
db_conf
'}
->
{'
-port
'}
=
3306
;
$self
->
{'
max_loops
'}
=
0
;
#unlimited
$self
->
{'
beekeeper_type
'}
=
'
LSF
';
$|
=
1
;
my
$conf_file
;
my
(
$help
,
$host
,
$user
,
$pass
,
$dbname
,
$port
,
$adaptor
,
$url
);
my
(
$job_limit
,
$batch_size
);
my
$loopit
=
0
;
my
$worker_limit
=
50
;
my
$sleep_time
=
5
;
my
$sync
=
0
;
$self
->
{'
overdue_limit
'}
=
75
;
#minutes
$self
->
{'
show_analysis_stats
'}
=
undef
;
$self
->
{'
show_worker_stats
'}
=
undef
;
GetOptions
('
help
'
=>
\
$help
,
'
url=s
'
=>
\
$url
,
'
conf=s
'
=>
\
$conf_file
,
'
dbhost=s
'
=>
\
$host
,
'
dbport=i
'
=>
\
$port
,
'
dbuser=s
'
=>
\
$user
,
'
dbpass=s
'
=>
\
$pass
,
'
dbname=s
'
=>
\
$dbname
,
'
dead
'
=>
\
$self
->
{'
check_for_dead
'},
'
overdue
'
=>
\
$self
->
{'
overdue_limit
'},
'
alldead
'
=>
\
$self
->
{'
all_dead
'},
'
run
'
=>
\
$self
->
{'
run
'},
'
jlimit=i
'
=>
\
$job_limit
,
'
wlimit=i
'
=>
\
$worker_limit
,
'
batch_size=i
'
=>
\
$batch_size
,
'
loop
'
=>
\
$loopit
,
'
no_pend
'
=>
\
$self
->
{'
no_pend_adjust
'},
'
sync
'
=>
\
$sync
,
'
analysis_stats
'
=>
\
$self
->
{'
show_analysis_stats
'},
'
worker_stats
'
=>
\
$self
->
{'
show_worker_stats
'},
'
sleep=i
'
=>
\
$sleep_time
,
'
logic_name=s
'
=>
\
$self
->
{'
logic_name
'},
'
failed_jobs
'
=>
\
$self
->
{'
show_failed_jobs
'},
'
reset_job_id=i
'
=>
\
$self
->
{'
reset_job_id
'},
'
reset_all_jobs_for_analysis_id=i
'
=>
\
$self
->
{'
reset_all_jobs_for_analysis_id
'},
'
m=s
'
=>
\
$self
->
{'
lsf_machine_option
'},
);
if
(
$help
)
{
usage
();
}
parse_conf
(
$self
,
$conf_file
);
if
(
$self
->
{'
run
'})
{
$loopit
=
1
;
$self
->
{'
max_loops
'}
=
1
;
}
my
$DBA
;
if
(
$url
)
{
$DBA
=
Bio::EnsEMBL::Hive::
URLFactory
->
fetch
(
$url
);
die
("
Unable to connect to
$url
\n
")
unless
(
$DBA
);
}
else
{
if
(
$host
)
{
$self
->
{'
db_conf
'}
->
{'
-host
'}
=
$host
;
}
if
(
$port
)
{
$self
->
{'
db_conf
'}
->
{'
-port
'}
=
$port
;
}
if
(
$dbname
)
{
$self
->
{'
db_conf
'}
->
{'
-dbname
'}
=
$dbname
;
}
if
(
$user
)
{
$self
->
{'
db_conf
'}
->
{'
-user
'}
=
$user
;
}
if
(
$pass
)
{
$self
->
{'
db_conf
'}
->
{'
-pass
'}
=
$pass
;
}
unless
(
defined
(
$self
->
{'
db_conf
'}
->
{'
-host
'})
and
defined
(
$self
->
{'
db_conf
'}
->
{'
-user
'})
and
defined
(
$self
->
{'
db_conf
'}
->
{'
-dbname
'}))
{
print
"
\n
ERROR : must specify host, user, and database to connect
\n\n
";
usage
();
}
# connect to database specified
$DBA
=
new
Bio::EnsEMBL::Hive::DBSQL::
DBAdaptor
(
%
{
$self
->
{'
db_conf
'}});
}
$self
->
{'
dba
'}
=
$DBA
;
my
$queen
=
$DBA
->
get_Queen
;
if
(
$self
->
{'
reset_job_id
'})
{
$queen
->
reset_and_fetch_job_by_dbID
(
$self
->
{'
reset_job_id
'});
};
if
(
$self
->
{'
reset_all_jobs_for_analysis_id
'})
{
reset_all_jobs_for_analysis_id
(
$self
);
}
if
(
$self
->
{'
all_dead
'})
{
register_all_workers_dead
(
$self
,
$queen
);
}
if
(
$self
->
{'
check_for_dead
'})
{
check_for_dead_workers
(
$self
,
$queen
);
}
my
$analysis
=
$DBA
->
get_AnalysisAdaptor
->
fetch_by_logic_name
(
$self
->
{'
logic_name
'});
if
(
$loopit
)
{
run_autonomously
(
$self
,
$queen
);
}
elsif
(
$analysis
)
{
my
$stats
=
$analysis
->
stats
;
if
(
$sync
)
{
$queen
->
synchronize_AnalysisStats
(
$stats
);
$queen
->
check_blocking_control_rules_for_AnalysisStats
(
$stats
);
}
$stats
->
print_stats
;
}
else
{
$queen
->
synchronize_hive
()
if
(
$sync
);
$queen
->
print_analysis_status
if
(
$self
->
{'
show_analysis_stats
'});
$queen
->
print_running_worker_status
;
show_running_workers
(
$self
)
if
(
$self
->
{'
show_worker_stats
'});
$queen
->
get_num_needed_workers
();
$queen
->
get_hive_progress
();
show_failed_jobs
(
$self
)
if
(
$self
->
{'
show_failed_jobs
'});
}
exit
(
0
);
#######################
#
# subroutines
#
#######################
sub
usage
{
print
"
lsf_beekeeper.pl [options]
\n
";
print
"
-help : print this help
\n
";
print
"
-url <url string> : url defining where hive database is located
\n
";
print
"
-conf <path> : config file describing db connection
\n
";
print
"
-dbhost <machine> : mysql database host <machine>
\n
";
print
"
-dbport <port#> : mysql port number
\n
";
print
"
-dbname <name> : mysql database <name>
\n
";
print
"
-dbuser <name> : mysql connection user <name>
\n
";
print
"
-dbpass <pass> : mysql connection password
\n
";
print
"
-batch_size <num> : #jobs a worker can claim at once
\n
";
print
"
-jlimit <num> : #jobs to run before worker can die naturally
\n
";
print
"
-dead : clean overdue jobs for resubmission
\n
";
print
"
-overdue <min> : worker overdue minutes checking if dead
\n
";
print
"
-alldead : all outstanding workers
\n
";
print
"
-run : run 1 iteration of automation loop
\n
";
print
"
-loop : run autonomously, loops and sleeps
\n
";
print
"
-no_pend : don't adjust needed workers by pending workers
\n
";
print
"
-m <string> : passes <string> to LSF bsub -m option
\n
";
print
"
-sleep <num> : when looping, sleep <num> minutes (default 5)
\n
";
print
"
-wlimit <num> : max # workers to create per loop
\n
";
print
"
-analysis_stats : show status of each analysis
\n
";
print
"
-worker_stats : show status of each running worker
\n
";
print
"
-failed_jobs : show all failed jobs
\n
";
print
"
-reset_job_id <num> : reset a job back to READY so it can be rerun
\n
";
print
"
-reset_all_jobs_for_analysis_id <num>
\n
";
print
"
: reset jobs back to READY so it can be rerun
\n
";
print
"
lsf_beekeeper.pl v1.5
\n
";
exit
(
1
);
}
sub
parse_conf
{
my
$self
=
shift
;
my
$conf_file
=
shift
;
if
(
$conf_file
and
(
-
e
$conf_file
))
{
#read configuration file from disk
my
@conf_list
=
@
{
do
$conf_file
};
foreach
my
$confPtr
(
@conf_list
)
{
#print("HANDLE type " . $confPtr->{TYPE} . "\n");
if
((
$confPtr
->
{
TYPE
}
eq
'
COMPARA
')
or
(
$confPtr
->
{
TYPE
}
eq
'
DATABASE
'))
{
$self
->
{'
db_conf
'}
=
$confPtr
;
}
}
}
}
sub
check_for_dead_workers
{
my
$self
=
shift
;
my
$queen
=
shift
;
print
("
===== check for dead workers
\n
");
my
$overdueWorkers
=
$queen
->
fetch_overdue_workers
(
$self
->
{'
overdue_limit
'}
*
60
);
print
(
scalar
(
@
{
$overdueWorkers
}),
"
overdue workers
\n
");
foreach
my
$worker
(
@
{
$overdueWorkers
})
{
if
(
$worker
->
beekeeper
eq
'
LSF
')
{
printf
("
%10d %35s %15s %20s(%d) :
",
$worker
->
hive_id
,
$worker
->
host
,
$worker
->
process_id
,
$worker
->
analysis
->
logic_name
,
$worker
->
analysis
->
dbID
);
my
$cmd
=
"
bjobs
"
.
$worker
->
process_id
.
"
2>&1 | grep -v 'not found' | grep -v JOBID | grep -v EXIT
";
#print(" check worker with : $cmd\n");
my
$check
=
qx/$cmd/
;
unless
(
$check
)
{
print
("
worker is missing => it DIED!!
\n
");
$queen
->
register_worker_death
(
$worker
);
}
else
{
print
("
ALIVE and running
\n
");
}
}
}
}
sub
register_all_workers_dead
{
my
$self
=
shift
;
my
$queen
=
shift
;
my
$overdueWorkers
=
$queen
->
fetch_overdue_workers
(
0
);
foreach
my
$worker
(
@
{
$overdueWorkers
})
{
$queen
->
register_worker_death
(
$worker
);
}
}
sub
show_overdue_workers
{
my
$self
=
shift
;
my
$queen
=
shift
;
print
("
===== overdue workers
\n
");
my
$overdueWorkers
=
$queen
->
fetch_overdue_workers
(
$self
->
{'
overdue_limit
'}
*
60
);
foreach
my
$worker
(
@
{
$overdueWorkers
})
{
printf
("
%10d %35s %15s %20s(%d)
\n
",
$worker
->
hive_id
,
$worker
->
host
,
$worker
->
process_id
,
$worker
->
analysis
->
logic_name
,
$worker
->
analysis
->
dbID
);
}
}
sub
show_running_workers
{
my
$self
=
shift
;
my
$queen
=
$self
->
{'
dba
'}
->
get_Queen
;
print
("
===== running workers
\n
");
my
$worker_list
=
$queen
->
fetch_overdue_workers
(
0
);
foreach
my
$worker
(
@
{
$worker_list
})
{
printf
("
%10d %35s(%5d) %5s:%15s %15s (%s)
\n
",
$worker
->
hive_id
,
$worker
->
analysis
->
logic_name
,
$worker
->
analysis
->
dbID
,
$worker
->
beekeeper
,
$worker
->
process_id
,
$worker
->
host
,
$worker
->
last_check_in
);
}
}
sub
show_failed_jobs
{
my
$self
=
shift
;
print
("
===== failed jobs
\n
");
my
$failed_job_list
=
$self
->
{'
dba
'}
->
get_AnalysisJobAdaptor
->
fetch_all_failed_jobs
;
foreach
my
$job
(
@
{
$failed_job_list
})
{
my
$analysis
=
$self
->
{'
dba
'}
->
get_AnalysisAdaptor
->
fetch_by_dbID
(
$job
->
analysis_id
);
printf
("
job_id=%d %35s(%5d) input_id='%s'
\n
",
$job
->
dbID
,
$analysis
->
logic_name
,
$analysis
->
dbID
,
$job
->
input_id
);
}
}
sub
run_autonomously
{
my
$self
=
shift
;
my
$queen
=
shift
;
unless
(`
runWorker.pl
`)
{
print
("
can't find runWorker.pl script. Please make sure it's in your path
\n
");
exit
(
1
);
}
my
(
$cmd
,
$worker_cmd
);
my
$loopCount
=
1
;
while
(
$loopit
)
{
print
("
\n
=======lsf_beekeeper loop **
$loopCount
**==========
\n
");
check_for_dead_workers
(
$self
,
$queen
);
$queen
->
print_analysis_status
if
(
$self
->
{'
show_analysis_stats
'});
$queen
->
print_running_worker_status
;
my
$runCount
=
$queen
->
get_num_running_workers
();
my
$load
=
$queen
->
get_hive_current_load
();
my
$count
=
$queen
->
get_num_needed_workers
();
my
$pend_count
=
$self
->
get_pending_count
();
$count
=
$count
-
$pend_count
;
if
(
$load
==
0
and
$count
==
0
and
$runCount
==
0
)
{
#nothing running and nothing todo => do hard resync
print
("
*** nothing is happening => do a hard resync
\n
");
$queen
->
synchronize_hive
();
$count
=
$queen
->
get_num_needed_workers
();
}
$count
=
$worker_limit
if
(
$count
>
$worker_limit
);
if
(
$count
>
0
)
{
print
("
need
$count
workers
\n
");
$worker_cmd
=
"
runWorker.pl -bk LSF -url
$url
";
$worker_cmd
.=
"
-limit
$job_limit
"
if
(
defined
$job_limit
);
$worker_cmd
.=
"
-batch_size
$batch_size
"
if
(
defined
$batch_size
);
if
(
$count
>
1
)
{
$cmd
=
"
bsub -JHL
$loopCount
\
[1-
$count
\
]
";}
else
{
$cmd
=
"
bsub -JHL
$loopCount
";}
$cmd
.=
"
-m '
"
.
$self
->
{'
lsf_machine_option
'}
.
"
'
"
if
(
$self
->
{'
lsf_machine_option
'});
$cmd
.=
"
"
.
$worker_cmd
;
print
("
$cmd
\n
");
system
(
$cmd
);
}
$queen
->
get_hive_progress
();
last
if
(
$self
->
{'
max_loops
'}
>
0
and
(
$loopCount
>=
$self
->
{'
max_loops
'}));
$DBA
->
dbc
->
disconnect_if_idle
;
print
("
sleep
$sleep_time
minutes. Next loop at
",
scalar
localtime
(
time
+
$sleep_time
*
60
),"
.
\n
");
sleep
(
$sleep_time
*
60
);
$loopCount
++
;
}
printf
("
dbc %d disconnect cycles
\n
",
$DBA
->
dbc
->
disconnect_count
);
}
sub
get_pending_count
{
my
$self
=
shift
;
return
0
if
(
$self
->
{'
no_pend_adjust
'});
my
$cmd
=
"
bjobs | grep -c PEND
";
my
$pend_count
=
qx/$cmd/
;
chomp
(
$pend_count
);
print
("
$pend_count
workers queued but not running
\n
");
return
$pend_count
;
}
sub
reset_all_jobs_for_analysis_id
{
my
$self
=
shift
;
my
$analysis
=
$self
->
{'
dba
'}
->
get_AnalysisAdaptor
->
fetch_by_dbID
(
$self
->
{'
reset_all_jobs_for_analysis_id
'});
$self
->
{'
dba
'}
->
get_AnalysisJobAdaptor
->
reset_all_jobs_for_analysis_id
(
$analysis
->
dbID
);
$self
->
{'
dba
'}
->
get_Queen
->
synchronize_AnalysisStats
(
$analysis
->
stats
);
}
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment