Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl-hive
Commits
b44b5e3f
Commit
b44b5e3f
authored
Apr 27, 2009
by
Leo Gordon
Browse files
this script has to be available to all compara and needed a bit of fixing
parent
576d38f2
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
302 additions
and
0 deletions
+302
-0
scripts/cmd_hive.pl
scripts/cmd_hive.pl
+302
-0
No files found.
scripts/cmd_hive.pl
0 → 100755
View file @
b44b5e3f
#!/usr/local/bin/perl -w
# cmd_hive.pl
#
# Cared for by Albert Vilella <>
#
# Copyright Albert Vilella
#
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code
=head1 NAME
cmd_hive.pl - DESCRIPTION
=head1 SYNOPSIS
perl \
/nfs/acari/avilella/src/ensembl_main/ensembl-personal/avilella/hive/cmd_hive.pl \
-url mysql://user:password@mysqldb:port/name_of_hive_db -logic_name \
example1 -input_id 'echo I.have.$suffix.$tag.and.I.am.baking.one.right.now' \
-suffix_a apple01 -suffix_b apple05 -tag pies\
=head1 DESCRIPTION
Simple SystemCmd call through Bio::EnsEMBL::Hive with $suffix ranging
from suffix_a and suffix_b, looping in a ($alfa..$beta) fashion. $tag is for
fixed elements.
Always use single quotes for input_id.
Be careful of using things that don't expand, like apple_01 apple_05
instead of apple01 apple05
Also don't use suffix_a and suffix_b in the reverse order apple05
to apple01 because they expand in things like:
apple54,applf04,applf54,applg04,applg54,applh04,applh54...
If using hashed, call with something like:
[-hashed_a 00:00:00]
[-hashed_b 01:61:67]
=head1 AUTHOR - Albert Vilella
Email
Describe contact details here
=head1 CONTRIBUTORS
Additional contributors names and emails here
=cut
# Let the code begin...
use
strict
;
use
DBI
;
use
Getopt::
Long
;
use
Bio::EnsEMBL::Hive::DBSQL::
DBAdaptor
;
use
Bio::EnsEMBL::Hive::
Worker
;
use
Bio::EnsEMBL::Hive::
Queen
;
use
Time::
HiRes
qw(time gettimeofday tv_interval)
;
use
Bio::EnsEMBL::Hive::RunnableDB::
SystemCmd
;
#use Data::UUID;
# ok this is a hack, but I'm going to pretend I've got an object here
# by creating a blessed hash ref and passing it around like an object
# this is to avoid using global variables in functions, and to consolidate
# the globals into a nice '$self' package
my
$self
=
bless
{};
$self
->
{'
db_conf
'}
=
{};
$self
->
{'
db_conf
'}
->
{'
-user
'}
=
'
ensro
';
$self
->
{'
db_conf
'}
->
{'
-port
'}
=
3306
;
$self
->
{'
analysis_id
'}
=
undef
;
$self
->
{'
logic_name
'}
=
undef
;
$self
->
{'
outdir
'}
=
undef
;
$self
->
{'
beekeeper
'}
=
undef
;
$self
->
{'
process_id
'}
=
undef
;
my
$conf_file
;
my
(
$help
,
$host
,
$user
,
$pass
,
$dbname
,
$port
,
$adaptor
,
$url
);
GetOptions
('
help
'
=>
\
$help
,
'
url=s
'
=>
\
$url
,
'
conf=s
'
=>
\
$conf_file
,
'
dbhost=s
'
=>
\
$host
,
'
dbport=i
'
=>
\
$port
,
'
dbuser=s
'
=>
\
$user
,
'
dbpass=s
'
=>
\
$pass
,
'
dbname=s
'
=>
\
$dbname
,
'
analysis_id=i
'
=>
\
$self
->
{'
analysis_id
'},
'
logic_name=s
'
=>
\
$self
->
{'
logic_name
'},
'
limit=i
'
=>
\
$self
->
{'
job_limit
'},
'
lifespan=i
'
=>
\
$self
->
{'
lifespan
'},
'
outdir=s
'
=>
\
$self
->
{'
outdir
'},
'
bk=s
'
=>
\
$self
->
{'
beekeeper
'},
'
pid=s
'
=>
\
$self
->
{'
process_id
'},
'
input_id=s
'
=>
\
$self
->
{'
input_id
'},
'
inputfile=s
'
=>
\
$self
->
{'
inputfile
'},
'
suffix_a=s
'
=>
\
$self
->
{'
suffix_a
'},
'
suffix_b=s
'
=>
\
$self
->
{'
suffix_b
'},
'
hashed_a=s
'
=>
\
$self
->
{'
hashed_a
'},
'
hashed_b=s
'
=>
\
$self
->
{'
hashed_b
'},
'
tag=s
'
=>
\
$self
->
{'
tag
'},
'
hive_capacity=s
'
=>
\
$self
->
{'
hive_capacity
'},
'
batch_size=s
'
=>
\
$self
->
{'
batch_size
'},
'
debug=s
'
=>
\
$self
->
{'
debug
'},
);
$self
->
{'
analysis_id
'}
=
shift
if
(
@
_
);
if
(
$help
)
{
usage
();
}
my
$DBA
;
if
(
$url
)
{
$DBA
=
Bio::EnsEMBL::Hive::
URLFactory
->
fetch
(
$url
);
}
else
{
if
(
$host
)
{
$self
->
{'
db_conf
'}
->
{'
-host
'}
=
$host
;
}
if
(
$port
)
{
$self
->
{'
db_conf
'}
->
{'
-port
'}
=
$port
;
}
if
(
$dbname
)
{
$self
->
{'
db_conf
'}
->
{'
-dbname
'}
=
$dbname
;
}
if
(
$user
)
{
$self
->
{'
db_conf
'}
->
{'
-user
'}
=
$user
;
}
if
(
$pass
)
{
$self
->
{'
db_conf
'}
->
{'
-pass
'}
=
$pass
;
}
unless
(
defined
(
$self
->
{'
db_conf
'}
->
{'
-host
'})
and
defined
(
$self
->
{'
db_conf
'}
->
{'
-user
'})
and
defined
(
$self
->
{'
db_conf
'}
->
{'
-dbname
'}))
{
print
"
\n
ERROR : must specify host, user, and database to connect
\n\n
";
usage
();
}
# connect to database specified
$DBA
=
new
Bio::EnsEMBL::Hive::DBSQL::
DBAdaptor
(
%
{
$self
->
{'
db_conf
'}});
$url
=
$DBA
->
url
();
}
#$DBA->dbc->disconnect_when_inactive(1);
my
$queen
=
$DBA
->
get_Queen
();
job_creation
(
$self
);
exit
(
0
);
#######################
#
# subroutines
#
#######################
sub
usage
{
print
"
cmd_hive.pl [options]
\n
";
print
"
-help : print this help
\n
";
print
"
-url <url string> : url defining where hive database is located
\n
";
print
"
-input_id <cmd string> : command to be executed
\n
";
print
"
-suffix_a <tag> : suffix from here
\n
";
print
"
-suffix_b <tag> : suffix to here
\n
";
print
"
-suffix_bn <tag> : end for suffix in multiple levels
\n
";
print
"
-tag <tag> : fixed tag in the command line
\n
";
print
"
cmd_hive.pl v1.0
\n
";
exit
(
1
);
}
sub
job_creation
{
my
$self
=
shift
;
my
$logic_name
=
$self
->
{'
logic_name
'}
||
"
cmd_hive_analysis
";
print
("
creating analysis '
$logic_name
'
\n
");
$self
->
{
_analysis
}
=
Bio::EnsEMBL::
Analysis
->
new
(
-
db
=>
'',
-
db_file
=>
'',
-
db_version
=>
'
1
',
-
parameters
=>
"",
-
logic_name
=>
$logic_name
,
-
module
=>
'
Bio::EnsEMBL::Hive::RunnableDB::SystemCmd
',
);
$DBA
->
get_AnalysisAdaptor
()
->
store
(
$self
->
{
_analysis
});
my
$stats
=
$self
->
{
_analysis
}
->
stats
;
$stats
->
batch_size
(
$self
->
{'
batch_size
'}
||
1
);
$stats
->
hive_capacity
(
$self
->
{'
hive_capacity
'}
||
20
);
$stats
->
status
('
READY
');
$stats
->
update
();
print
("
$0 -- inserting jobs
\n
");
my
$starttime
=
time
();
my
$count
=
0
;
if
(
defined
(
$self
->
{'
hashed_a
'})
and
defined
(
$self
->
{'
hashed_b
'}))
{
while
(
$self
->
{
resolved_input_id
}
=
$self
->
resolve_suffix
())
{
print
STDERR
"
",
$self
->
{
resolved_input_id
},
"
\n
"
if
(
$self
->
{
debug
});
$self
->
create_resolved_input_id_job
()
unless
(
$self
->
{
debug
});
if
(
++
$count
%
100
==
0
)
{
print
"",
$self
->
{
resolved_input_id
},
"
at
",(
time
()
-
$starttime
),"
secs
\n
";
}
}
}
elsif
(
defined
(
$self
->
{'
inputfile
'}))
{
open
FILE
,
$self
->
{'
inputfile
'}
or
die
$!
;
while
(
<
FILE
>
)
{
chomp
$_
;
my
$id
=
$_
;
my
$input_id
=
$self
->
{'
input_id
'};
$input_id
=~
s/\$inputfile/$id/
;
$self
->
{
resolved_input_id
}
=
$input_id
;
$self
->
create_resolved_input_id_job
();
print
"
Job
",
$count
,
"
at
",(
time
()
-
$starttime
),"
secs
\n
"
if
(
++
$count
%
50
==
0
);
}
close
FILE
;
}
elsif
(
defined
(
$self
->
{'
suffix_a
'})
and
defined
(
$self
->
{'
suffix_b
'}))
{
my
$tag
=
$self
->
{'
tag
'};
for
my
$suffix
(
$self
->
{'
suffix_a
'}
..
$self
->
{'
suffix_b
'}
)
{
# expanding tags here:
$self
->
{
resolved_input_id
}
=
$self
->
{'
input_id
'};
$self
->
{
resolved_input_id
}
=~
s/\$suffix/$suffix/g
;
$self
->
{
resolved_input_id
}
=~
s/\$tag/$tag/g
;
print
"",
$self
->
{
resolved_input_id
},
"
at
",(
time
()
-
$starttime
),"
secs
\n
";
# if(++$count % 50 == 0);
$self
->
create_resolved_input_id_job
();
}
}
my
$total_time
=
(
time
()
-
$starttime
);
print
"
$count
jobs created in
$total_time
secs
\n
";
print
("
speed :
",(
$count
/ $total_time), " jobs/s
ec
\
n
"
);
}
sub create_resolved_input_id_job {
my
$self
= shift;
Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor->CreateNewJob
(
-input_id =>
$self
->{resolved_input_id},
-analysis =>
$self
->{_analysis},
-input_job_id => 0,
) unless (
$self
->{'debug'});
}
sub resolve_suffix {
my
$self
= shift;
my
@h_a
= split(
"
\
:"
,
$self
->{'hashed_a'});
my
@h_b
= split(
"
\
:"
,
$self
->{'hashed_b'});
$self
->{_hlevels} = scalar(
@h_a
);
my
$level
=
$self
->{_hlevels};
if (!defined(
$self
->{
"
_a$level
"
}) || !defined(
$self
->{
"
_b$level
"
})) {
warn(
"
wrong
hashed
options:
$!
\
n
"
) if (
$self
->{_hlevels} != scalar(
@h_b
));
foreach my
$level
(1..
$self
->{_hlevels}) {
my
$index
=
$level
-1;
$self
->{
"
_a$level
"
} = sprintf(
"
%
02
d
"
,
$h_a
[
$index
]) if (!defined(
$self
->{
"
_a$level
"
}));
$self
->{
"
_b$level
"
} = sprintf(
"
%
02
d
"
,
$h_b
[
$index
]) if (!defined(
$self
->{
"
_b$level
"
}));
$self
->{
"
_h$level
"
} = sprintf(
"
%
02
d
"
,
$h_a
[
$index
]) if (!defined(
$self
->{
"
_h$level
"
}));
1;
}
} else {
my
$levely
=
$self
->{_hlevels};
my
$levelx
= 1;
my
$max
= '99';
while (
$levelx
<=
$self
->{_hlevels}) {
if (
$self
->{
"
_h$levelx
"
} ==
$self
->{
"
_b$levelx
"
}) {
# We reached the max for this level
$max
=
$self
->{
"
_b$levelx
"
};
} else {
$max
= '99';
while (
$levely
> 0) {
if (
$self
->{
"
_h$levely
"
} <
$max
) {
$self
->{
"
_h$levely
"
}++;
$levely
= 0;
} else {
$self
->{
"
_h$levely
"
} = '00';
}
# this two lines break the loop
$levely
--;
$levelx
=
$self
->{_hlevels};
}
}
$levelx
++;
}
}
my
$hashed_input_id
=
$self
->{'input_id'};
foreach my
$level
(1..
$self
->{_hlevels}) {
my
$value
;
$value
= sprintf(
"
%
02
d
"
,
$self
->{
"
_h$level
"
});
$hashed_input_id
=~ s/(
\$
h
$level
)/
$value
/ge;
1;
}
if (defined(
$self
->{'hashed_input_id'})) {
if (
$self
->{'hashed_input_id'} eq
$hashed_input_id
) {
# we are at the last one, so return undef
return undef;
} else {
$self
->{'hashed_input_id'} =
$hashed_input_id
;
}
} else {
$self
->{'hashed_input_id'} =
$hashed_input_id
;
}
return
$hashed_input_id
;
}
1;
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment