print"\npercent_gc_calc.pl – run on core databases for new species, or where sequence or assembly have changed (db names will be stored in file $outdir/percent_gc_data.txt, to submit run submit_density_features.pl -submit percent_gc): \n";
print "\npercent_gc_calc.pl – run on core databases for new species, or where sequence or assembly have changed (db names will be stored in file $outdir/percent_gc_data.txt, to submit run submit_density_features.pl -submit percent_gc -h ens-staging1 -h ens-staging2 -u ensadmin -p xxxx): \n";
my %array_union = ();
foreach my $element (@new_sp_assem, @chg_seq) { $array_union{$element}++ }
...
...
@@ -181,7 +211,7 @@ select distinct concat(full_db_name,'|',db_host) from db_list dl join db d using
}
close DATAFILE;
print"\n\nrepeat_coverage_calc.pl – run on core databases for new species, or where sequence, assembly or repeats have changed (db names will be stored in file $outdir/repeat_coverage_data.txt, to submit run submit_density_features.pl -submit repeat_coverage): \n";
print "\n\nrepeat_coverage_calc.pl – run on core databases for new species, or where sequence, assembly or repeats have changed (db names will be stored in file $outdir/repeat_coverage_data.txt, to submit run submit_density_features.pl -submit repeat_coverage -h ens-staging1 -h ens-staging2 -u ensadmin -p xxxx): \n";
foreach my $element (@chg_repeats) { $array_union{$element}++ }
@dbnames_hosts = sort(keys %array_union);
...
...
@@ -202,15 +232,15 @@ select distinct concat(full_db_name,'|',db_host) from db_list dl join db d using
if ($response >= 2) {
print "\n\n2. Density features scripts which can be run when Compara homologies are handed over and core xref projections are complete:\n\n";
print"gene_density_calc.pl - run on all core dbs (use the commands below or script submit_density_features.pl -submit gene_density)\n";
print "gene_density_calc.pl - run on all core dbs (use the commands below or script submit_density_features.pl -submit gene_density -h ens-staging1 -h ens-staging2 -u ensadmin -p xxxx)\n";
print"\n\nseq_region_stats.pl (gene stats option only) - run on all core databases (use the commands below or script submit_density_features.pl -submit seq_region_stats_gene)\n";
print "\n\nseq_region_stats.pl (gene stats option only) - run on all core databases (use the commands below or script submit_density_features.pl -submit seq_region_stats_gene -h ens-staging1 -h ens-staging2 -u ensadmin -p xxxx)\n";
print "\n\n3. Density features scripts which can be run when Variation dbs are handed over:\n";
print"\nvariation_density.pl - run for new species or where the core assembly has changed, or if there are any changes to variation positions in the variation database (species will be stored in file $outdir/variation_density_data.txt, to submit run submit_density_features.pl -submit variation_density):\n";
print "\nvariation_density.pl - run for new species or where the core assembly has changed, or if there are any changes to variation positions in the variation database (species will be stored in file $outdir/variation_density_data.txt, to submit run submit_density_features.pl -submit variation_density -h ens-staging1 -h ens-staging2 -u ensadmin -p xxxx):\n";
#get species for new dbs or changed assembly or where variation positions have changed
@core_with_variation = map { $_->[0] } @{ $prod_dbh->selectall_arrayref("select distinct concat(full_db_name,'|',db_host) from db_list dl join db d using (db_id) where db_release = $current_release and db_type = 'core' and species_id in (select distinct species_id from db where db_release = $current_release and db_type = 'variation');") };
...
...
@@ -255,7 +285,7 @@ if ($response == 3) {
}
close DATAFILE;
print"\n\nseq_region_stats.pl (snp stats option only) - run on core databases for new species or if the assembly changed, or if the variation positions have changed in the corresponding variation db (db names will be stored in file $outdir/seq_region_stats_snp_data.txt, to submit run submit_density_features.pl -submit seq_region_stats_snp):\n";
print "\n\nseq_region_stats.pl (snp stats option only) - run on core databases for new species or if the assembly changed, or if the variation positions have changed in the corresponding variation db (db names will be stored in file $outdir/seq_region_stats_snp_data.txt, to submit run submit_density_features.pl -submit seq_region_stats_snp -h ens-staging1 -h ens-staging2 -u ensadmin -p xxxx):\n";
my $file_path = "$outdir/seq_region_stats_snp_data.txt";
push(@print_message,"Submitting gene GC calculation for host ".$host[$i]." to queue 'normal'. The output from this job goes to the file ".$outdir."/core_dbs_".$current_release."_".$host[$i]."_genegc.out\n");
push(@print_message,"Submitting gene GC calculation for host ".$host[$i]." to queue 'normal'. The output from this job goes to the file $outdir/core_dbs_$current_release"."_".$host[$i]."_genegc.out\n");
push(@print_message,"Submitting gene density calculation for host ".$host[$i]." to queue 'normal'. The output from this job goes to the file ".$outdir."/core_dbs_".$current_release."_".$host[$i]."_gene.out\n");
push(@print_message,"Submitting seq region gene stats for host ".$host[$i]." to queue 'normal'. The output from this job goes to the file ".$outdir."/core_dbs_".$current_release."_".$host[$i]."_seqreg_gene.out\n");
push(@print_message,"Submitting ".$script_title." for ".$db_name." on host ".$host_name." to queue '".$queue."'. The output from this job goes to the file ".$outdir."/".$db_name.$file_name_end.".out\n");
push(@print_message,"Submitting ".$script_title." for ".$db_name ." on host ".$host_name." to queue '".$queue."'. The output from this job goes to the file $outdir/$db_name$file_name_end.out\n");
}
else {
$error = 1;
...
...
@@ -391,22 +421,38 @@ sub usage {
my $indent = ' ' x length($0);
print <<EOF; exit(0);
Thescriptlistsdatabases/specieswhichshouldhavedensityfeaturesupdatedatthespecifiedstageinthereleasecycle.There's an option for submitting a selected script.
Options -h -u -p are mandatory and need to be specified for at least one host. When using more than
one host it\'s possible to leave out the user name and password for the second host and they will
be copied from the first host: e.g. -h ens-staging1 -h ens-staging2 -u ensadmin -p xxxx (user ensadmin
and password xxxx will be used for both ens-staging1 and ens-staging2).
The script lists databases/species which should have density features updated at the specified stage in the release cycle when using option -g, e.g.