Newer
Older
use strict;
Rhoda Kinsella
committed
use Data::Dumper;
Andy Yates
committed
use Bio::EnsEMBL::ApiVersion qw/software_version/;
Rhoda Kinsella
committed
$Data::Dumper::Useqq=1;
$Data::Dumper::Terse = 1;
$Data::Dumper::Indent = 0;
# Submits the display name and GO term projections as farm jobs
# Remember to check/set the various config optons
# ------------------------------ config -------------------------------
Andy Yates
committed
my $release = software_version();
my $base_dir = "mydir";
Andy Yates
committed
my $conf = "release_${release}.ini"; # registry config file, specifies Compara location
# location of other databases
Rhoda Kinsella
committed
Rhoda Kinsella
committed
my @config = ( {
'-host' => 'HOST',
'-port' => 'PORT',
'-user' => 'USER',
'-pass' => 'PASS',
Rhoda Kinsella
committed
'-db_version' => $release
},
{
'-host' => 'HOST',
'-port' => 'PORT',
'-user' => 'USER',
'-pass' => 'PASS',
Rhoda Kinsella
committed
'-db_version' => $release
} );
Rhoda Kinsella
committed
Rhoda Kinsella
committed
my $registryconf = Dumper(\@config);
# load limit for ens-staging MySQL instance above which jobs won't be started
my $limit = 200;
# -------------------------- end of config ----------------------------
# check that base directory exists
die ("Cannot find base directory $base_dir") if (! -e $base_dir);
# create release subdir if necessary
my $dir = $base_dir. $release;
if (! -e $dir) {
mkdir $dir;
print "Created $dir\n";
} else {
print "Cleaning and re-using $dir\n";
unlink <$dir/*.out>, <$dir/*.err>;
}
# common options
Rhoda Kinsella
committed
my $script_opts = "-conf '$conf' -registryconf '$registryconf' -version '$release' -release '$release' -quiet";
Rhoda Kinsella
committed
$bsub_opts .= "-M2000000 -R'select[mem>2000] rusage[mem=2000]'";

Kieron Taylor
committed
######
# When editing xref projection lists below, remember to check the species is in
# the execution order array that follows.
######
$names_1_1{'human'} = [qw(
alpaca
anolis
armadillo
bushbaby
cat
chicken
chimp
coelacanth
cow
dolphin
elephant
gibbon
gorilla
ground_shrew
guinea_pig
horse
hyrax
macaque
marmoset
megabat
microbat
mouse_lemur
opossum
orang_utan
panda
pig
pika
platypus
psinensis
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
sloth
squirrel
rabbit
tarsier
tasmanian_devil
tenrec
tree_shrew
turkey
wallaby
western_european_hedgehog
xenopus
zebrafinch
)];
$names_1_1{'mouse'} = [qw(
kangaroo_rat
rat
)];
my %names_1_many;
$names_1_many{'human'} = [qw(
cod
fugu
lamprey
medaka
stickleback
tetraodon
tilapia
zebrafish
)];
my %go_terms;
$go_terms{'human'} = [qw(
alpaca
anolis
armadillo
bushbaby
cat
chicken
chimp
cow
dog
dolphin
elephant
gibbon
gorilla
ground_shrew
guinea_pig
horse
hyrax
kangaroo_rat
macaque
marmoset
megabat
microbat
mouse
mouse_lemur
opossum
orang_utan
panda
pig
platypus
psinensis
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
sloth
squirrel
pika
rabbit
rat
tarsier
tasmanian_devil
tenrec
tree_shrew
turkey
wallaby
western_european_hedgehog
zebrafinch
)];
$go_terms{'mouse'} = [qw(
alpaca
anolis
armadillo
bushbaby
cat
chicken
chimp
cow
dog
dolphin
elephant
gorilla
ground_shrew
guinea_pig
horse
hyrax
human
kangaroo_rat
macaque
marmoset
megabat
microbat
mouse_lemur
opossum
orang_utan
panda
pig
pika
platypus
psinensis
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
rabbit
rat
sloth
squirrel
tarsier
tasmanian_devil
tenrec
tree_shrew
turkey
wallaby
western_european_hedgehog
zebrafinch
)];
$go_terms{'rat'} = [qw(
human
mouse
)];
$go_terms{'zebrafish'} = [qw(
cod
coelacanth
fugu
lamprey
stickleback
tetraodon
tilapia
xenopus
)];
$go_terms{'xenopus'} = [qw(zebrafish)];

Kieron Taylor
committed
# order to run projections in, just in case they are order-sensitive.
my @execution_order = qw/human mouse rat zebrafish xenopus/;
# except of course order is irrelevant to the job queue. Consider provisional for when
# someone desires jobs that wait for others to finish.
# ----------------------------------------
print "Deleting projected names (one to one)\n";
foreach my $species (keys %names_1_1) {
foreach my $to (@{$names_1_1{$species}}) {
system "perl project_display_xrefs.pl $script_opts -to $to -delete_names -delete_only\n";
Rhoda Kinsella
committed
}
foreach my $from (@execution_order) {
if (not exists($names_1_1{$from})) {next;}
foreach my $to (@{$names_1_1{$from}}) {
my $o = "$dir/names_${from}_$to.out";
my $e = "$dir/names_${from}_$to.err";
my $n = substr("n_${from}_$to", 0, 10); # job name display limited to 10 chars
my $all = ($from eq "human") ? "" : "--all_sources"; # non-human from species -> use all sources
print "Submitting name projection from $from to $to\n";
system "bsub $bsub_opts -o $o -e $e -J $n perl project_display_xrefs.pl $script_opts -from $from -to $to -names -no_database $all\n";
}
print "Deleting projected names (one to many)\n";
foreach my $from (keys %names_1_many) {
foreach my $to (@{$names_1_many{$from}}) {
system "perl project_display_xrefs.pl $script_opts -to $to -delete_names -delete_only\n";
Rhoda Kinsella
committed
}
foreach my $from (@execution_order) {
if (not exists($names_1_many{$from})) {next;}
foreach my $to (@{$names_1_many{$from}}) {
my $o = "$dir/names_${from}_$to.out";
my $e = "$dir/names_${from}_$to.err";
my $n = substr("n_${from}_$to", 0, 10);
print "Submitting name projection from $from to $to (1:many)\n";
system "bsub $bsub_opts -o $o -e $e -J $n perl project_display_xrefs.pl $script_opts -from $from -to $to -names -no_database -one_to_many\n";
# ----------------------------------------
print "Deleting projected GO terms\n";
foreach my $from (keys %go_terms) {
foreach my $to (@{$go_terms{$from}}) {
system "perl project_display_xrefs.pl $script_opts -to $to -delete_go_terms -delete_only\n";
Rhoda Kinsella
committed
}
foreach my $from (@execution_order) {
if (not exists($go_terms{$from})) {next;}
foreach my $to (@{$go_terms{$from}}) {
my $o = "$dir/go_${from}_$to.out";
my $e = "$dir/go_${from}_$to.err";
my $n = substr("g_${from}_$to", 0, 10);
print "Submitting GO term projection from $from to $to\n";
system "bsub $bsub_opts -q long -o $o -e $e -J $n perl project_display_xrefs.pl $script_opts -from $from -to $to -go_terms\n";
Glenn Proctor
committed
# ----------------------------------------