Adding a run_all flag as otherwise this gets nasty for pre-species

c211c494 · Andy Yates · f990cd08 · c211c494 · c211c494 · c211c494
Commit c211c494 authored 12 years ago by Andy Yates
--- a/docs/pipelines/fasta.html
+++ b/docs/pipelines/fasta.html
--- a/docs/pipelines/fasta.textile
+++ b/docs/pipelines/fasta.textile
@@ -122,6 +122,7 @@ bc. -species human -species cele -species yeast
 |@-pipeline_db -host=@|String|No|Specify a host for the hive database e.g. @-pipeline_db -host=myserver.mysql@|See hive generic config|*YES*|
 |@-pipeline_db -dbname=@|String|No|Specify a different database to use as the hive DB e.g. @-pipeline_db -dbname=my_dumps_test@|Uses pipeline name by default|*NO*|
 |@-ftp_dir@|String|No|Location of the current FTP directory with the previous release's files. We will use this to copy DNA files from one release to another. If not given we do not do any reuse|-|*NO*|
+|@-run_all_@|Boolean|No|Ignores any kind of reuse an forces the dump of all DNAs|-|*NO*|
 |@-species@|String|Yes|Specify one or more species to process. Pipeline will only _consider_ these species. Use *-force_species* if you want to force a species run|-|*NO*|
 |@-force_species@|String|Yes|Specify one or more species to force through the pipeline. This is useful to force a dump when you know reuse will not do the _"right thing"_|-|*NO*|
 |@-dump_types@|String|Yes|Specify each type of dump you want to produce. Supported values are *dna*, *cdna* and *ncrna*|All|*NO*|
@@ -172,6 +173,14 @@ bc.
 	-pipeline_db -host=my-db-host -species celegans -force_species celegans \
 	-base_path /path/to/dumps -registry reg.pm

+h3. Running everything:
+
+bc.
+	 init_pipeline.pl Bio::EnsEMBL::Pipeline::PipeConfig::FASTA_conf \
+	 -pipeline_db -host=my-db-host -run_all 1 \
+	 -base_path /path/to/dumps -registry reg.pm
+
+
 h3. Dumping just gene data (no DNA or ncRNA):

 bc. 

--- a/modules/Bio/EnsEMBL/Pipeline/FASTA/ReuseSpeciesFactory.pm
+++ b/modules/Bio/EnsEMBL/Pipeline/FASTA/ReuseSpeciesFactory.pm
@@ -39,6 +39,8 @@ Allowed parameters are:
 =item force_species - Specify species we want to redump even though 
                      our queries of production could say otherwise

+=item run_all - Do not check a thing. Override and run every dump
+
 =back

 The registry should also have a DBAdaptor for the production schema 
@@ -70,6 +72,7 @@ sub param_defaults {
    %{$self->SUPER::param_defaults()},
    
    force_species => [],
+    force_all_species => 1,
  };
  return $p;
 }
@@ -150,6 +153,7 @@ SQL

 sub force_run {
  my ($self, $dba) = @_;
+  return 1 if $self->param('run_all');
  my $new = Bio::EnsEMBL::Registry->get_alias($dba->species());
  return ($self->param('force_species_lookup')->{$new}) ? 1 : 0;
 }

--- a/modules/Bio/EnsEMBL/Pipeline/PipeConfig/FASTA_conf.pm
+++ b/modules/Bio/EnsEMBL/Pipeline/PipeConfig/FASTA_conf.pm
@@ -22,22 +22,34 @@ sub default_options {
        ### Optional overrides        
        ftp_dir => '',

+        # Species to run; use this to restrict to subset of species but be aware you
+        # are still open to the reuse checks. If you do not want this then use
+        # force_species
        species => [],
        
+        # The types to emit
        dump_types => [],
        
+        # The databases to emit (defaults to core)
        db_types => [],
        
+        # Specify species you really need to get running
        force_species => [],
        
+        # Only process these logic names
        process_logic_names => [],
        
+        # As above but switched around. Do not process these names
        skip_logic_names => [],
        
+        # The release of the data
        release => software_version(),
        
+        # The previous release; override if running on something different
        previous_release => (software_version() - 1),
        
+        force_all_species => 0, #always run every species
+        
        ### SCP code
        
        blast_servers => [],
@@ -53,6 +65,7 @@ sub default_options {
        pipeline_name => 'fasta_dump_'.$self->o('release'),
        
        wublast_exe => 'xdformat',
+        ncbiblast_exe => 'makeblastdb',
        blat_exe => 'faToTwoBit',
        port_offset => 30000,
        
@@ -82,6 +95,7 @@ sub pipeline_analyses {
          sequence_type_list => $self->o('dump_types'),
          ftp_dir => $self->o('ftp_dir'),
          force_species => $self->o('force_species'),
+          run_all => $self->o('run_all'),
        },
        -input_ids  => [ {} ],
        -flow_into  => {