print"\npercent_gc_calc.pl – run on core databases for new species, or where sequence or assembly have changed (db names will be stored in file ./percent_gc_data.txt, to submit run submit_density_features.pl -submit percent_gc): \n";
print"\npercent_gc_calc.pl – run on core databases for new species, or where sequence or assembly have changed (db names will be stored in file $outdir/percent_gc_data.txt, to submit run submit_density_features.pl -submit percent_gc): \n";
open(DATAFILE,">$file_path")ordie("Failed to open file $file_path for writing\n");
open(DATAFILE,">$file_path")ordie("Failed to open file $file_path for writing\n");
foreachmy$dbname_host(@dbnames_hosts){
foreachmy$dbname_host(@dbnames_hosts){
...
@@ -172,12 +181,12 @@ select distinct concat(full_db_name,'|',db_host) from db_list dl join db d using
...
@@ -172,12 +181,12 @@ select distinct concat(full_db_name,'|',db_host) from db_list dl join db d using
}
}
closeDATAFILE;
closeDATAFILE;
print"\n\nrepeat_coverage_calc.pl – run on core databases for new species, or where sequence, assembly or repeats have changed (db names will be stored in file ./repeat_coverage_data.txt, to submit run submit_density_features.pl -submit repeat_coverage): \n";
print"\n\nrepeat_coverage_calc.pl – run on core databases for new species, or where sequence, assembly or repeats have changed (db names will be stored in file $outdir/repeat_coverage_data.txt, to submit run submit_density_features.pl -submit repeat_coverage): \n";
print"\n\nseq_region_stats.pl (gene stats option only) - run on all core databases (use the commands below or script submit_density_features.pl -submit seq_region_stats_gene)\n";
print"\n\nseq_region_stats.pl (gene stats option only) - run on all core databases (use the commands below or script submit_density_features.pl -submit seq_region_stats_gene)\n";
print"\n\n3. Density features scripts which can be run when Variation dbs are handed over:\n";
print"\n\n3. Density features scripts which can be run when Variation dbs are handed over:\n";
print"\nvariation_density.pl - run for new species or where the core assembly has changed, or if there are any changes to variation positions in the variation database (species will be stored in file ./variation_density_data.txt, to submit run submit_density_features.pl -submit variation_density):\n";
print"\nvariation_density.pl - run for new species or where the core assembly has changed, or if there are any changes to variation positions in the variation database (species will be stored in file $outdir/variation_density_data.txt, to submit run submit_density_features.pl -submit variation_density):\n";
#get species for new dbs or changed assembly or where variation positions have changed
#get species for new dbs or changed assembly or where variation positions have changed
@core_with_variation=map{$_->[0]}@{$prod_dbh->selectall_arrayref("select distinct concat(full_db_name,'|',db_host) from db_list dl join db d using (db_id) where db_release = $current_release and db_type = 'core' and species_id in (select distinct species_id from db where db_release = $current_release and db_type = 'variation');")};
@core_with_variation=map{$_->[0]}@{$prod_dbh->selectall_arrayref("select distinct concat(full_db_name,'|',db_host) from db_list dl join db d using (db_id) where db_release = $current_release and db_type = 'core' and species_id in (select distinct species_id from db where db_release = $current_release and db_type = 'variation');")};
open(DATAFILE,">$file_path")ordie("Failed to open file $file_path for writing\n");
open(DATAFILE,">$file_path")ordie("Failed to open file $file_path for writing\n");
foreachmy$dbname_host(@dbnames_hosts){
foreachmy$dbname_host(@dbnames_hosts){
...
@@ -246,9 +255,9 @@ if ($response == 3) {
...
@@ -246,9 +255,9 @@ if ($response == 3) {
}
}
closeDATAFILE;
closeDATAFILE;
print"\n\nseq_region_stats.pl (snp stats option only) - run on core databases for new species or if the assembly changed, or if the variation positions have changed in the corresponding variation db (db names will be stored in file ./seq_region_stats_snp_data.txt, to submit run submit_density_features.pl -submit seq_region_stats_snp):\n";
print"\n\nseq_region_stats.pl (snp stats option only) - run on core databases for new species or if the assembly changed, or if the variation positions have changed in the corresponding variation db (db names will be stored in file $outdir/seq_region_stats_snp_data.txt, to submit run submit_density_features.pl -submit seq_region_stats_snp):\n";
push(@print_message,"Submitting gene GC calculation for host ".$host[$i]." to queue 'normal'. The output from this job goes to the file ".$outdir."/core_dbs_".$current_release."_".$host[$i]."_genegc.out\n");
push(@print_message,"Submitting gene GC calculation for host ".$host[$i]." to queue 'normal'. The output from this job goes to the file ".$outdir."/core_dbs_".$current_release."_".$host[$i]."_genegc.out\n");
push(@print_message,"Submitting gene density calculation for host ".$host[$i]." to queue 'normal'. The output from this job goes to the file ".$outdir."/core_dbs_".$current_release."_".$host[$i]."_gene.out\n");
push(@print_message,"Submitting gene density calculation for host ".$host[$i]." to queue 'normal'. The output from this job goes to the file ".$outdir."/core_dbs_".$current_release."_".$host[$i]."_gene.out\n");
push(@print_message,"Submitting seq region gene stats for host ".$host[$i]." to queue 'normal'. The output from this job goes to the file ".$outdir."/core_dbs_".$current_release."_".$host[$i]."_seqreg_gene.out\n");
push(@print_message,"Submitting seq region gene stats for host ".$host[$i]." to queue 'normal'. The output from this job goes to the file ".$outdir."/core_dbs_".$current_release."_".$host[$i]."_seqreg_gene.out\n");
push(@print_message,"Submitting ".$script_title." for ".$db_name." on host ".$host_name." to queue '".$queue."'. The output from this job goes to the file ".$outdir."/".$db_name.$file_name_end.".out\n");
push(@print_message,"Submitting ".$script_title." for ".$db_name." on host ".$host_name." to queue '".$queue."'. The output from this job goes to the file ".$outdir."/".$db_name.$file_name_end.".out\n");
}
}
else{
else{
...
@@ -371,8 +380,8 @@ $prod_dbh->disconnect;
...
@@ -371,8 +380,8 @@ $prod_dbh->disconnect;
foreachmy$cmd(@cmd){
foreachmy$cmd(@cmd){
print$print_message[$cmd_count];
print$print_message[$cmd_count];
#for testing
#for testing
#print "\n\n". $cmd . "\n\n";
print"\n\n".$cmd."\n\n";
system($cmd);
#system($cmd);
$cmd_count++;
$cmd_count++;
}
}
}
}
...
@@ -407,19 +416,20 @@ Usage:
...
@@ -407,19 +416,20 @@ Usage:
gene_gc - the script will run on all core databases
gene_gc - the script will run on all core databases
percent_gc - the script will run on dbs listed in ./percent_gc_data.txt
percent_gc - the script will run on dbs listed in [outdir]/percent_gc_data.txt
repeat_coverage - the script will run on dbs listed in ./repeat_coverage_data.txt
repeat_coverage - the script will run on dbs listed in [outdir]/repeat_coverage_data.txt
gene_density - the script will run on all core databases
gene_density - the script will run on all core databases
seq_region_stats_gene - the script will run on all core databases
seq_region_stats_gene - the script will run on all core databases
variation_density - the script will run for species listed in ./variation_density_data.txt
variation_density - the script will run for species listed in [outdir]/variation_density_data.txt
seq_region_stats_snp - the script will run on dbs listed in ./seq_region_stats_snp_data.txt
seq_region_stats_snp - the script will run on dbs listed in [outdir]/seq_region_stats_snp_data.txt
-o|outdir Output path for farm job commands (current path if not specified)
-o|outdir Path for farm job output and error files as well as data files generated by the script using
option -g(-getdbs) (current path if not specified)