From cc6a319ae402bfc7a9f64e3d3e5400f759cb7204 Mon Sep 17 00:00:00 2001
From: Leo Gordon <lg4@ebi.ac.uk>
Date: Mon, 9 May 2011 15:34:07 +0000
Subject: [PATCH] EXPERIMENTAL sqlite support in eHive

---
 .../Bio/EnsEMBL/Hive/DBSQL/AnalysisAdaptor.pm |   2 +-
 .../EnsEMBL/Hive/DBSQL/AnalysisDataAdaptor.pm |   2 +-
 .../EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm  |  53 +--
 .../Hive/DBSQL/AnalysisStatsAdaptor.pm        |  15 +-
 modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm |  29 +-
 modules/Bio/EnsEMBL/Hive/Extensions.pm        |  28 +-
 modules/Bio/EnsEMBL/Hive/NakedTable.pm        |   2 +-
 .../Hive/PipeConfig/HiveGeneric_conf.pm       |  71 ++-
 .../EnsEMBL/Hive/PipeConfig/LongMult_conf.pm  |  14 +-
 modules/Bio/EnsEMBL/Hive/Queen.pm             |  20 +-
 .../Bio/EnsEMBL/Hive/RunnableDB/JobFactory.pm |   4 +-
 modules/Bio/EnsEMBL/Hive/RunnableDB/SqlCmd.pm |   5 +-
 modules/Bio/EnsEMBL/Hive/URLFactory.pm        |  32 +-
 sql/tables.sqlite                             | 408 ++++++++++++++++++
 14 files changed, 594 insertions(+), 91 deletions(-)
 create mode 100644 sql/tables.sqlite

diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisAdaptor.pm
index 97057685b..33d5617b6 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisAdaptor.pm
@@ -36,7 +36,7 @@ sub fetch_by_logic_name_or_url {
     my $self                = shift @_;
     my $logic_name_or_url   = shift @_;
 
-    if($logic_name_or_url =~ m{^\w+://}) {
+    if($logic_name_or_url =~ m{^\w*://}) {
         return Bio::EnsEMBL::Hive::URLFactory->fetch($logic_name_or_url, $self->db);
     } else {
         return $self->fetch_by_logic_name($logic_name_or_url);
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisDataAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisDataAdaptor.pm
index 3a8f52605..f206ee486 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisDataAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisDataAdaptor.pm
@@ -71,7 +71,7 @@ sub store {
   
   my $sth2 = $self->prepare("INSERT INTO analysis_data (data) VALUES (?)");
   $sth2->execute($data);
-  $data_id = $sth2->{'mysql_insertid'};
+  $data_id = ($self->dbc->driver eq 'sqlite') ? $self->dbc->db_handle->func('last_insert_rowid') : $sth->{'mysql_insertid'};
   $sth2->finish;
 
   return $data_id;
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm
index c6c5322c7..fc92e91c3 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm
@@ -99,16 +99,20 @@ sub CreateNewJob {
     $input_id = "_ext_input_analysis_data_id $input_data_id";
   }
 
-  my $sql = q{INSERT ignore into job 
+  my $dbc = $analysis->adaptor->db->dbc;
+  my $insertion_method = ($dbc->driver eq 'sqlite') ? 'INSERT OR IGNORE' : 'INSERT IGNORE';
+  $insertion_method = 'INSERT'; # we are expecting this to fire
+
+  my $status = $blocked ? 'BLOCKED' : 'READY';
+
+  my $sql = qq{$insertion_method INTO job 
               (input_id, prev_job_id,analysis_id,status,semaphore_count,semaphored_job_id)
               VALUES (?,?,?,?,?,?)};
  
-  my $status = $blocked ? 'BLOCKED' : 'READY';
-
-  my $dbc = $analysis->adaptor->db->dbc;
   my $sth = $dbc->prepare($sql);
-  $sth->execute($input_id, $prev_job_id, $analysis->dbID, $status, $semaphore_count, $semaphored_job_id);
-  my $job_id = $sth->{'mysql_insertid'};
+
+  $sth->execute($input_id, $prev_job_id, $analysis->dbID, $status, $semaphore_count || 0, $semaphored_job_id);
+  my $job_id = ($dbc->driver eq 'sqlite') ? $dbc->db_handle->func('last_insert_rowid') : $sth->{'mysql_insertid'};
   $sth->finish;
 
   $dbc->do("UPDATE analysis_stats SET ".
@@ -399,11 +403,11 @@ sub update_status {
     my $sql = "UPDATE job SET status='".$job->status."' ";
 
     if($job->status eq 'DONE') {
-        $sql .= ",completed=now()";
+        $sql .= ",completed=CURRENT_TIMESTAMP";
         $sql .= ",runtime_msec=".$job->runtime_msec;
         $sql .= ",query_count=".$job->query_count;
     } elsif($job->status eq 'PASSED_ON') {
-        $sql .= ", completed=now()";
+        $sql .= ", completed=CURRENT_TIMESTAMP";
     } elsif($job->status eq 'READY') {
     }
 
@@ -450,7 +454,9 @@ sub store_out_files {
   $self->dbc->do($sql);
   return unless($job->stdout_file or $job->stderr_file);
 
-  $sql = "INSERT ignore INTO job_file (job_id, worker_id, retry, type, path) VALUES ";
+  my $insertion_method = ($self->dbc->driver eq 'sqlite') ? 'INSERT OR IGNORE' : 'INSERT IGNORE';
+
+  $sql = "$insertion_method INTO job_file (job_id, worker_id, retry, type, path) VALUES ";
   if($job->stdout_file) {
     $sql .= sprintf("(%d,%d,%d,'STDOUT','%s')", $job->dbID, $job->worker_id, 
 		    $job->retry_count, $job->stdout_file); 
@@ -485,23 +491,22 @@ sub grab_jobs_for_worker {
   
   my $analysis_id = $worker->analysis->dbID();
   my $worker_id   = $worker->dbID();
+  my $batch_size  = $worker->batch_size();
 
-  my $sql_base = qq{
-    UPDATE job
-    SET worker_id='$worker_id', status='CLAIMED'
-    WHERE analysis_id='$analysis_id' AND status='READY' AND semaphore_count<=0
-  };
+  my $update_sql            = "UPDATE job SET worker_id='$worker_id', status='CLAIMED'";
+  my $selection_start_sql   = " WHERE analysis_id='$analysis_id' AND status='READY' AND semaphore_count<=0";
 
-  my $sql_virgin = $sql_base .  
-                   " AND retry_count=0".
-                   " LIMIT " . $worker->batch_size;
+  my $virgin_selection_sql  = $selection_start_sql . " AND retry_count=0 LIMIT $batch_size";
+  my $any_selection_sql     = $selection_start_sql . " LIMIT $batch_size";
 
-  my $sql_any = $sql_base .  
-                " LIMIT " . $worker->batch_size;
-  
-  my $claim_count = $self->dbc->do($sql_virgin);
-  if($claim_count == 0) {
-    $claim_count = $self->dbc->do($sql_any);
+  if($self->dbc->driver eq 'sqlite') {
+      unless(my $claim_count = $self->dbc->do( $update_sql . " WHERE job_id IN (SELECT job_id FROM job $virgin_selection_sql) AND status='READY'" )) {
+            $claim_count = $self->dbc->do( $update_sql . " WHERE job_id IN (SELECT job_id FROM job $any_selection_sql) AND status='READY'" );
+      }
+  } else {
+      unless(my $claim_count = $self->dbc->do( $update_sql . $virgin_selection_sql )) {
+            $claim_count = $self->dbc->do( $update_sql . $any_selection_sql );
+      }
   }
 
   my $constraint = "j.analysis_id='$analysis_id' AND j.worker_id='$worker_id' AND j.status='CLAIMED'";
@@ -607,7 +612,7 @@ sub release_and_age_job {
         #
     $self->dbc->do( qq{
         UPDATE job
-           SET status=IF( $may_retry AND (retry_count<$max_retry_count), 'READY', 'FAILED'), retry_count=retry_count+1
+           SET status=(CASE WHEN $may_retry AND (retry_count<$max_retry_count) THEN 'READY' ELSE 'FAILED' END), retry_count=retry_count+1
          WHERE job_id=$job_id
            AND status in ('COMPILATION','GET_INPUT','RUN','WRITE_OUTPUT')
     } );
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisStatsAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisStatsAdaptor.pm
index 9075de724..6ed787904 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisStatsAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisStatsAdaptor.pm
@@ -209,7 +209,7 @@ sub update {
   $sql .= ",failed_job_tolerance=" . $stats->failed_job_tolerance();
   $sql .= ",num_running_workers=" . $stats->num_running_workers();
   $sql .= ",num_required_workers=" . $stats->num_required_workers();
-  $sql .= ",last_update=NOW()";
+  $sql .= ",last_update=CURRENT_TIMESTAMP";
   $sql .= ",sync_lock='0'";
   $sql .= ",rc_id=". $stats->rc_id();
   $sql .= ",can_be_empty=". $stats->can_be_empty();
@@ -218,7 +218,7 @@ sub update {
   my $sth = $self->prepare($sql);
   $sth->execute();
   $sth->finish;
-  $sth = $self->prepare("INSERT INTO analysis_stats_monitor SELECT now(), analysis_stats.* from analysis_stats WHERE analysis_id = ".$stats->analysis_id);
+  $sth = $self->prepare("INSERT INTO analysis_stats_monitor SELECT CURRENT_TIMESTAMP, analysis_stats.* from analysis_stats WHERE analysis_id = ".$stats->analysis_id);
   $sth->execute();
   $sth->finish;
   $stats->seconds_since_last_update(0); #not exact but good enough :)
@@ -285,7 +285,7 @@ sub decrease_hive_capacity
 
   my $sql = "UPDATE analysis_stats ".
       " SET hive_capacity = hive_capacity - 1, ".
-      " num_required_workers = IF(num_required_workers > 0, num_required_workers - 1, 0) ".
+      " num_required_workers = (CASE WHEN num_required_workers > 0 THEN num_required_workers - 1 ELSE 0 END) ".
       " WHERE analysis_id='$analysis_id' and hive_capacity > 1";
 
   $self->dbc->do($sql);
@@ -469,7 +469,10 @@ sub _columns {
                     ast.rc_id
                     ast.can_be_empty
                    );
-  push @columns , "UNIX_TIMESTAMP()-UNIX_TIMESTAMP(ast.last_update) seconds_since_last_update ";
+
+  push @columns , ($self->dbc->driver eq 'sqlite')
+                    ? "strftime('%s','now')-strftime('%s',ast.last_update) seconds_since_last_update "
+                    : "UNIX_TIMESTAMP()-UNIX_TIMESTAMP(ast.last_update) seconds_since_last_update ";
   return @columns;            
 }
 
@@ -534,7 +537,9 @@ sub _create_new_for_analysis_id {
 
   my $sql;
 
-  $sql = "INSERT ignore INTO analysis_stats (analysis_id) VALUES ($analysis_id)";
+  my $insertion_method = ($self->dbc->driver eq 'sqlite') ? 'INSERT OR IGNORE' : 'INSERT IGNORE';
+
+  $sql = "$insertion_method INTO analysis_stats (analysis_id) VALUES ($analysis_id)";
   #print("$sql\n");
   my $sth = $self->prepare($sql);
   $sth->execute();
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
index 69d4363ad..e2de2d04f 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
@@ -90,26 +90,37 @@ sub _table_info_loader {
 
     my $dbc         = $self->dbc();
     my $dbname      = $dbc->dbname();
+    my $driver      = $dbc->driver();
     my $table_name  = $self->table_name();
 
     my %column_set  = ();
     my @primary_key = ();
     my $autoinc_id  = '';
 
-    my $sql = "SELECT column_name,column_key,extra FROM information_schema.columns WHERE table_schema='$dbname' and table_name='$table_name'";
+    my $sql = {
+        'mysql' => "SELECT column_name AS name, column_key='PRI' AS pk, extra='auto_increment' AS ai FROM information_schema.columns WHERE table_schema='$dbname' and table_name='$table_name'",
+        'sqlite'=> "PRAGMA table_info('$table_name')",
+    }->{$driver} or die "could not find column info for driver='$driver'";
+
     my $sth = $self->prepare($sql);
-    $sth->execute;  
-    while(my ($column_name, $column_key, $extra) = $sth->fetchrow ) {
+    $sth->execute;
+    while(my $row = $sth->fetchrow_hashref ) {
+        my $column_name = $row->{'name'};
+
         $column_set{$column_name} = 1;
-        if($column_key eq 'PRI') {
+        if($row->{'pk'}) {
             push @primary_key, $column_name;
-            if($extra eq 'auto_increment') {
+            if($row->{'ai'}) {
                 $autoinc_id = $column_name;
             }
         }
     }
     $sth->finish;
 
+    if(($driver eq 'sqlite') and scalar(@primary_key)==1) {
+        $autoinc_id = $primary_key[0];
+    }
+
     $self->column_set(  \%column_set );
     $self->primary_key( \@primary_key );
     $self->autoinc_id(   $autoinc_id );
@@ -275,8 +286,12 @@ sub store {
     my $table_name          = $self->table_name();
     my $column_set          = $self->column_set();
     my $autoinc_id          = $self->autoinc_id();
+    my $driver              = $self->dbc->driver();
     my $insertion_method    = $self->insertion_method;  # INSERT, INSERT_IGNORE or REPLACE
     $insertion_method       =~ s/_/ /g;
+    if($driver eq 'sqlite') {
+        $insertion_method =~ s/INSERT IGNORE/INSERT OR IGNORE/ig;
+    }
 
         # NB: here we assume all hashes will have the same keys:
     my $non_autoinc_columns = [ grep { $_ ne $autoinc_id } keys %$column_set ];
@@ -289,7 +304,7 @@ sub store {
         if($check_presence_in_db_first and my $present = $self->check_object_present_in_db($object)) {
             $self->mark_stored($object, $present);
         } else {
-            #print "STORE: $sql\n";
+            # print "STORE: $sql\n";
             $sth ||= $self->prepare( $sql );    # only prepare (once) if we get here
 
             #print "NON_AUTOINC_COLUMNS: ".join(', ', @$non_autoinc_columns)."\n";
@@ -300,7 +315,7 @@ sub store {
                     # using $return_code in boolean context allows to skip the value '0E0' ('no rows affected') that Perl treats as zero but regards as true:
                 or die "Could not perform\n\t$sql\nwith data:\n\t(".join(',', @$non_autoinc_values).')';
             if($return_code > 0) {     # <--- for the same reason we have to be expliticly numeric here
-                $self->mark_stored($object, $sth->{'mysql_insertid'});
+                $self->mark_stored($object, ($driver eq 'sqlite') ? $self->dbc->db_handle->func('last_insert_rowid') : $sth->{'mysql_insertid'});
             }
         }
     }
diff --git a/modules/Bio/EnsEMBL/Hive/Extensions.pm b/modules/Bio/EnsEMBL/Hive/Extensions.pm
index 535c79250..d0f4baefe 100755
--- a/modules/Bio/EnsEMBL/Hive/Extensions.pm
+++ b/modules/Bio/EnsEMBL/Hive/Extensions.pm
@@ -90,17 +90,26 @@ sub Bio::EnsEMBL::Analysis::process {
 
 =cut
 
-sub Bio::EnsEMBL::DBSQL::DBConnection::url
-{
+sub Bio::EnsEMBL::DBSQL::DBConnection::url {
   my $self = shift;
-  return undef unless($self->host and $self->port and $self->dbname);
-  my $url = "mysql://";
+
+  return undef unless($self->driver and $self->dbname);
+
+  my $url = $self->driver . '://';
+
   if($self->username) {
     $url .= $self->username;
     $url .= ":".$self->password if($self->password);
     $url .= "@";
   }
-  $url .= $self->host .":". $self->port ."/" . $self->dbname;
+  if($self->host) {
+    $url .= $self->host;
+    if($self->port) {
+        $url .= ':'.$self->port;
+    }
+  }
+  $url .= '/' . $self->dbname;
+
   return $url;
 }
 
@@ -118,15 +127,12 @@ sub Bio::EnsEMBL::DBSQL::DBConnection::url
 
 =cut
 
-sub Bio::EnsEMBL::Analysis::url
-{
+sub Bio::EnsEMBL::Analysis::url {
   my $self = shift;
-  my $url;
 
   return undef unless($self->adaptor);
-  $url = $self->adaptor->db->dbc->url;
-  $url .= "/analysis?logic_name=" . $self->logic_name;
-  return $url;  
+
+  return $self->adaptor->db->dbc->url . '/analysis?logic_name=' . $self->logic_name;
 }
 
 
diff --git a/modules/Bio/EnsEMBL/Hive/NakedTable.pm b/modules/Bio/EnsEMBL/Hive/NakedTable.pm
index a080b092d..fb40d4df1 100644
--- a/modules/Bio/EnsEMBL/Hive/NakedTable.pm
+++ b/modules/Bio/EnsEMBL/Hive/NakedTable.pm
@@ -69,7 +69,7 @@ sub url {
     my $ref_dba = shift @_;     # if reference dba is the same as 'our' dba, a shorter url can be generated
 
     if(my $adaptor = $self->adaptor) {
-        my $conn_prefix = ($adaptor->db == $ref_dba) ? 'mysql:///' : $adaptor->db->dbc->url();
+        my $conn_prefix = ($adaptor->db == $ref_dba) ? ':///' : $adaptor->db->dbc->url();
         return $conn_prefix .'/'. $self->table_name() . '?insertion_method=' . $self->insertion_method();
     } else {
         return;
diff --git a/modules/Bio/EnsEMBL/Hive/PipeConfig/HiveGeneric_conf.pm b/modules/Bio/EnsEMBL/Hive/PipeConfig/HiveGeneric_conf.pm
index 636b9eabb..6ce2aa7a3 100644
--- a/modules/Bio/EnsEMBL/Hive/PipeConfig/HiveGeneric_conf.pm
+++ b/modules/Bio/EnsEMBL/Hive/PipeConfig/HiveGeneric_conf.pm
@@ -52,8 +52,12 @@ use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
 use Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor;
 use Bio::EnsEMBL::Hive::Extensions;
 
+our ($hive_default_driver) = 'mysql';
+
+
 # ---------------------------[the following methods will be overridden by specific pipelines]-------------------------
 
+
 =head2 default_options
 
     Description : Interface method that should return a hash of option_name->default_option_value pairs.
@@ -74,10 +78,12 @@ sub default_options {
             -user   => 'ensadmin',
             -pass   => $self->o('password'),
             -dbname => $ENV{'USER'}.'_'.$self->o('pipeline_name'),  # example of a linked definition (resolved via saturation)
+            -driver => $hive_default_driver,
         },
     };
 }
 
+
 =head2 pipeline_create_commands
 
     Description : Interface method that should return a list of command lines to be run in order to create and set up the pipeline database.
@@ -86,17 +92,24 @@ sub default_options {
 =cut
 
 sub pipeline_create_commands {
-    my ($self) = @_;
-    return [
-        'mysql '.$self->dbconn_2_mysql('pipeline_db', 0)." -e 'CREATE DATABASE ".$self->o('pipeline_db', '-dbname')."'",
+    my $self = shift @_;
 
-            # standard eHive tables, foreign_keys and procedures:
-        'mysql '.$self->dbconn_2_mysql('pipeline_db', 1).' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/tables.sql',
-        'mysql '.$self->dbconn_2_mysql('pipeline_db', 1).' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/foreign_keys.sql',
-        'mysql '.$self->dbconn_2_mysql('pipeline_db', 1).' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/procedures.sql',
-    ];
+    return ($hive_default_driver eq 'sqlite')
+        ? [
+                # standard eHive tables and unique/non-unique indices:
+            $self->db_connect_command('pipeline_db').' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/tables.sqlite',
+        ]
+        : [
+            'mysql '.$self->dbconn_2_mysql('pipeline_db', 0)." -e 'CREATE DATABASE ".$self->o('pipeline_db', '-dbname')."'",
+
+                # standard eHive tables, foreign_keys and procedures:
+            $self->db_connect_command('pipeline_db').' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/tables.sql',
+            $self->db_connect_command('pipeline_db').' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/foreign_keys.sql',
+            $self->db_connect_command('pipeline_db').' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/procedures.sql',
+        ];
 }
 
+
 =head2 pipeline_wide_parameters
 
     Description : Interface method that should return a hash of pipeline_wide_parameter_name->pipeline_wide_parameter_value pairs.
@@ -112,6 +125,7 @@ sub pipeline_wide_parameters {
     };
 }
 
+
 =head2 resource_classes
 
     Description : Interface method that should return a hash of resource_description_id->resource_description_hash.
@@ -127,6 +141,7 @@ sub resource_classes {
     };
 }
 
+
 =head2 pipeline_analyses
 
     Description : Interface method that should return a list of hashes that define analysis bundled with corresponding jobs, dataflow and analysis_ctrl rules and resource_id.
@@ -160,6 +175,7 @@ sub new {
     return $self;
 }
 
+
 =head2 o
 
     Description : This is the method you call in the interface methods when you need to substitute an option: $self->o('password') .
@@ -187,6 +203,7 @@ sub o {                 # descends the option hash structure (vivifying all enco
     return $value;
 }
 
+
 =head2 dbconn_2_mysql
 
     Description : A convenience method used to stringify a connection-parameters hash into a parameter string that both mysql and beekeeper.pl can understand
@@ -203,6 +220,37 @@ sub dbconn_2_mysql {    # will save you a lot of typing
           .($with_db ? ($self->o($db_conn,'-dbname').' ') : '');
 }
 
+
+=head2 db_connect_command
+
+    Description : A convenience method used to stringify a command to connect to the db OR pipe an sql file into it.
+
+=cut
+
+sub db_connect_command {
+    my ($self, $db_conn) = @_;
+
+    return ($hive_default_driver eq 'sqlite')
+        ? 'sqlite3 '.$self->o($db_conn, '-dbname')
+        : 'mysql '.$self->dbconn_2_mysql($db_conn, 1);
+}
+
+
+=head2 db_execute_command
+
+    Description : A convenience method used to stringify a command to connect to the db OR pipe an sql file into it.
+
+=cut
+
+sub db_execute_command {
+    my ($self, $db_conn, $sql_command) = @_;
+
+    return ($hive_default_driver eq 'sqlite')
+        ? 'sqlite3 '.$self->o($db_conn, '-dbname')." '$sql_command'"
+        : 'mysql '.$self->dbconn_2_mysql($db_conn, 1)." -e '$sql_command'";
+}
+
+
 =head2 dbconn_2_url
 
     Description :  A convenience method used to stringify a connection-parameters hash into a 'url' that beekeeper.pl will undestand
@@ -212,9 +260,12 @@ sub dbconn_2_mysql {    # will save you a lot of typing
 sub dbconn_2_url {
     my ($self, $db_conn) = @_;
 
-    return 'mysql://'.$self->o($db_conn,'-user').':'.$self->o($db_conn,'-pass').'@'.$self->o($db_conn,'-host').':'.$self->o($db_conn,'-port').'/'.$self->o($db_conn,'-dbname');
+    return ($hive_default_driver eq 'sqlite')
+        ? $self->o($db_conn, '-driver').':///'.$self->o($db_conn,'-dbname')
+        : $self->o($db_conn, '-driver').'://'.$self->o($db_conn,'-user').':'.$self->o($db_conn,'-pass').'@'.$self->o($db_conn,'-host').':'.$self->o($db_conn,'-port').'/'.$self->o($db_conn,'-dbname');
 }
 
+
 =head2 process_options
 
     Description : The method that does all the parameter parsing magic.
@@ -460,7 +511,7 @@ sub run {
     print "  beekeeper.pl -url $url -run\t\t# (run one step of the pipeline - useful for debugging/learning)\n";
 
     print "\n\n\tTo connect to your pipeline database use the following line:\n\n";
-    print "  mysql ".$self->dbconn_2_mysql('pipeline_db',1)."\n\n";
+    print "  ".$self->db_connect_command('pipeline_db')."\n\n";
 }
 
 
diff --git a/modules/Bio/EnsEMBL/Hive/PipeConfig/LongMult_conf.pm b/modules/Bio/EnsEMBL/Hive/PipeConfig/LongMult_conf.pm
index dc53fb4a1..5ccfa30c3 100644
--- a/modules/Bio/EnsEMBL/Hive/PipeConfig/LongMult_conf.pm
+++ b/modules/Bio/EnsEMBL/Hive/PipeConfig/LongMult_conf.pm
@@ -57,6 +57,11 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf');  # All Hive databases configuration files should inherit from HiveGeneric, directly or indirectly
 
+
+   # EXPERIMENTAL: choose either 'mysql' or 'sqlite' and do not forget to provide the correct connection parameters:
+$Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf::hive_default_driver = 'mysql';
+
+
 =head2 default_options
 
     Description : Implements default_options() interface method of Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that is used to initialize default options.
@@ -72,6 +77,7 @@ sub default_options {
         'pipeline_name' => 'long_mult',                     # name used by the beekeeper to prefix job names on the farm
 
         'pipeline_db' => {                                  # connection parameters
+            -driver => $Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf::hive_default_driver,
             -host   => 'compara2',
             -port   => 3306,
             -user   => 'ensadmin',
@@ -98,8 +104,8 @@ sub pipeline_create_commands {
         @{$self->SUPER::pipeline_create_commands},  # inheriting database and hive tables' creation
 
             # additional tables needed for long multiplication pipeline's operation:
-        'mysql '.$self->dbconn_2_mysql('pipeline_db', 1)." -e 'CREATE TABLE intermediate_result (a_multiplier char(40) NOT NULL, digit tinyint NOT NULL, result char(41) NOT NULL, PRIMARY KEY (a_multiplier, digit))'",
-        'mysql '.$self->dbconn_2_mysql('pipeline_db', 1)." -e 'CREATE TABLE final_result (a_multiplier char(40) NOT NULL, b_multiplier char(40) NOT NULL, result char(80) NOT NULL, PRIMARY KEY (a_multiplier, b_multiplier))'",
+        $self->db_execute_command('pipeline_db', 'CREATE TABLE intermediate_result (a_multiplier char(40) NOT NULL, digit tinyint NOT NULL, result char(41) NOT NULL, PRIMARY KEY (a_multiplier, digit))'),
+        $self->db_execute_command('pipeline_db', 'CREATE TABLE final_result (a_multiplier char(40) NOT NULL, b_multiplier char(40) NOT NULL, result char(80) NOT NULL, PRIMARY KEY (a_multiplier, b_multiplier))'),
     ];
 }
 
@@ -143,7 +149,7 @@ sub pipeline_analyses {
                 # (jobs for this analysis will be flown_into via branch-2 from 'start' jobs above)
             ],
             -flow_into => {
-                'MAIN' => [ 'mysql:////intermediate_result' ],
+                'MAIN' => [ ':////intermediate_result' ],
             },
         },
         
@@ -155,7 +161,7 @@ sub pipeline_analyses {
             ],
             -wait_for => [ 'part_multiply' ],   # we can only start adding when all partial products have been computed
             -flow_into => {
-                'MAIN' => [ 'mysql:////final_result' ],
+                'MAIN' => [ ':////final_result' ],
             },
         },
     ];
diff --git a/modules/Bio/EnsEMBL/Hive/Queen.pm b/modules/Bio/EnsEMBL/Hive/Queen.pm
index f8b48707b..2fa5f3c3e 100755
--- a/modules/Bio/EnsEMBL/Hive/Queen.pm
+++ b/modules/Bio/EnsEMBL/Hive/Queen.pm
@@ -182,11 +182,11 @@ sub create_new_worker {
   
   my $sql = q{INSERT INTO worker 
               (born, last_check_in, meadow_type, process_id, host, analysis_id)
-              VALUES (NOW(), NOW(), ?,?,?,?)};
+              VALUES (CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?,?,?,?)};
 
   my $sth = $self->prepare($sql);
   $sth->execute($meadow_type, $process_id, $exec_host, $analysisStats->analysis_id);
-  my $worker_id = $sth->{'mysql_insertid'};
+  my $worker_id = ($self->dbc->driver eq 'sqlite') ? $self->dbc->db_handle->func('last_insert_rowid') : $sth->{'mysql_insertid'};
   $sth->finish;
 
   my $worker = $self->fetch_by_dbID($worker_id);
@@ -242,7 +242,7 @@ sub register_worker_death {
     $worker->analysis->stats->adaptor->decrease_running_workers($worker->analysis->stats->analysis_id);
   }
 
-  my $sql = "UPDATE worker SET died=now(), last_check_in=now()";
+  my $sql = "UPDATE worker SET died=CURRENT_TIMESTAMP, last_check_in=CURRENT_TIMESTAMP";
   $sql .= " ,status='DEAD'";
   $sql .= " ,work_done='" . $worker->work_done . "'";
   $sql .= " ,cause_of_death='$cod'";
@@ -332,7 +332,7 @@ sub worker_check_in {
   my ($self, $worker) = @_;
 
   return unless($worker);
-  my $sql = "UPDATE worker SET last_check_in=now()";
+  my $sql = "UPDATE worker SET last_check_in=CURRENT_TIMESTAMP";
   $sql .= " ,work_done='" . $worker->work_done . "'";
   $sql .= " WHERE worker_id='" . $worker->dbID ."'";
 
@@ -391,8 +391,10 @@ sub fetch_overdue_workers {
 
   $overdue_secs = 3600 unless(defined($overdue_secs));
 
-  my $constraint = "w.cause_of_death='' ".
-                   "AND (UNIX_TIMESTAMP()-UNIX_TIMESTAMP(w.last_check_in))>$overdue_secs";
+  my $constraint = "w.cause_of_death='' AND ".
+                    ( ($self->dbc->driver eq 'sqlite')
+                        ? "(strftime('%s','now')-strftime('%s',w.last_check_in))>$overdue_secs"
+                        : "(UNIX_TIMESTAMP()-UNIX_TIMESTAMP(w.last_check_in))>$overdue_secs");
   return $self->_generic_fetch($constraint);
 }
 
@@ -803,10 +805,10 @@ sub monitor
   my $sql = qq{
       INSERT INTO monitor
       SELECT
-          now(),
+          CURRENT_TIMESTAMP,
           count(*),
-          sum(work_done/TIME_TO_SEC(TIMEDIFF(now(),born))),
-          sum(work_done/TIME_TO_SEC(TIMEDIFF(now(),born)))/count(*),
+          sum(work_done/TIME_TO_SEC(TIMEDIFF(CURRENT_TIMESTAMP,born))),
+          sum(work_done/TIME_TO_SEC(TIMEDIFF(CURRENT_TIMESTAMP,born)))/count(*),
           group_concat(DISTINCT logic_name)
       FROM worker left join analysis USING (analysis_id)
       WHERE cause_of_death = ""};
diff --git a/modules/Bio/EnsEMBL/Hive/RunnableDB/JobFactory.pm b/modules/Bio/EnsEMBL/Hive/RunnableDB/JobFactory.pm
index 157b5f79f..c4b8c6ecf 100644
--- a/modules/Bio/EnsEMBL/Hive/RunnableDB/JobFactory.pm
+++ b/modules/Bio/EnsEMBL/Hive/RunnableDB/JobFactory.pm
@@ -179,7 +179,9 @@ sub _make_list_from_query {
 
     my $dbc;
     if(my $db_conn = $self->param('db_conn')) {
-        $dbc = DBI->connect("DBI:mysql:$db_conn->{-dbname}:$db_conn->{-host}:$db_conn->{-port}", $db_conn->{-user}, $db_conn->{-pass}, { RaiseError => 1 });
+        $db_conn->{-driver} ||= 'mysql';
+
+        $dbc = DBI->connect("DBI:$db_conn->{-driver}:$db_conn->{-dbname}:$db_conn->{-host}:$db_conn->{-port}", $db_conn->{-user}, $db_conn->{-pass}, { RaiseError => 1 });
     } else {
         $dbc = $self->db->dbc;
     }
diff --git a/modules/Bio/EnsEMBL/Hive/RunnableDB/SqlCmd.pm b/modules/Bio/EnsEMBL/Hive/RunnableDB/SqlCmd.pm
index 42630dc33..5d4d3b5bf 100755
--- a/modules/Bio/EnsEMBL/Hive/RunnableDB/SqlCmd.pm
+++ b/modules/Bio/EnsEMBL/Hive/RunnableDB/SqlCmd.pm
@@ -86,8 +86,9 @@ sub fetch_input {
         # Use connection parameters to another database if supplied, otherwise use the current database as default:
         #
     if(my $db_conn = $self->param('db_conn')) {
+        $db_conn->{-driver} ||= 'mysql';
 
-        $self->param('dbh', DBI->connect("DBI:mysql:$db_conn->{-dbname}:$db_conn->{-host}:$db_conn->{-port}", $db_conn->{-user}, $db_conn->{-pass}, { RaiseError => 1 }) );
+        $self->param('dbh', DBI->connect("DBI:$db_conn->{-driver}:$db_conn->{-dbname}:$db_conn->{-host}:$db_conn->{-port}", $db_conn->{-user}, $db_conn->{-pass}, { RaiseError => 1 }) );
     } else {
         $self->param('dbh', $self->db->dbc->db_handle );
     }
@@ -119,7 +120,7 @@ sub run {
         $dbh->do( $sql );
 
         my $insert_id_name  = '_insert_id_'.$counter++;
-        my $insert_id_value = $dbh->{'mysql_insertid'};
+        my $insert_id_value = ($dbh->driver eq 'sqlite') ? $dbh->func('last_insert_rowid') : $dbh->{'mysql_insertid'};
         $output_id{$insert_id_name} = $insert_id_value;
         $self->param($insert_id_name, $insert_id_value); # for templates
     }
diff --git a/modules/Bio/EnsEMBL/Hive/URLFactory.pm b/modules/Bio/EnsEMBL/Hive/URLFactory.pm
index 6f5df42e8..67a06c3ae 100755
--- a/modules/Bio/EnsEMBL/Hive/URLFactory.pm
+++ b/modules/Bio/EnsEMBL/Hive/URLFactory.pm
@@ -89,15 +89,15 @@ sub fetch {
 
     Bio::EnsEMBL::Hive::URLFactory->new();  # make sure global instance is created
 
-    if( my ($conn, $user, $pass, $host, $port, $dbname, $table_name, $tparam_name, $tparam_value, $conn_param_string) =
-        $url =~ m{^(mysql://(?:(\w+)(?:\:([^/\@]*))?\@)?(?:([\w\-\.]+)(?:\:(\d+))?)?/(\w*))(?:/(\w+)(?:\?(\w+)=(\w+))?)?((?:;(\w+)=(\w+))*)$} ) {
+    if( my ($conn, $driver, $user, $pass, $host, $port, $dbname, $table_name, $tparam_name, $tparam_value, $conn_param_string) =
+        $url =~ m{^((\w*)://(?:(\w+)(?:\:([^/\@]*))?\@)?(?:([\w\-\.]+)(?:\:(\d+))?)?/(\w*))(?:/(\w+)(?:\?(\w+)=(\w+))?)?((?:;(\w+)=(\w+))*)$} ) {
 
         my %conn_param = split(/[;=]/, 'type=hive;discon=0'.$conn_param_string );
 
-#warn "URLPARSER: conn='$conn', user='$user', pass='$pass', host='$host', port='$port', dbname='$dbname', table_name='$table_name', tparam_name='$tparam_name', tparam_value='$tparam_value'";
+#warn "URLPARSER: conn='$conn', driver='$driver', user='$user', pass='$pass', host='$host', port='$port', dbname='$dbname', table_name='$table_name', tparam_name='$tparam_name', tparam_value='$tparam_value'";
 #warn "CONN_PARAMS: ".Dumper(\%conn_param);
 
-        my $dba = ($conn eq 'mysql:///') ? $default_dba : $class->create_cached_dba($user, $pass, $host, $port, $dbname, %conn_param);
+        my $dba = ($conn =~ m{^\w*:///$} ) ? $default_dba : $class->create_cached_dba($driver, $user, $pass, $host, $port, $dbname, %conn_param);
 
         if(not $table_name) {
         
@@ -124,18 +124,19 @@ sub fetch {
 }
 
 sub create_cached_dba {
-    my $class   = shift @_;
-    my $user    = shift @_ || 'ensro';
-    my $pass    = shift @_ || '';
-    my $host    = shift @_ || '';
-    my $port    = shift @_ || 3306;
-    my $dbname  = shift @_;
-    my %conn_param = @_;
+    my ($class, $driver, $user, $pass, $host, $port, $dbname, %conn_param) = @_;
+
+    if($driver eq 'mysql') {
+        $user ||= 'ensro';
+        $pass ||= '';
+        $host ||= '';
+        $port ||= 3306;
+    }
 
     my $type   = $conn_param{'type'};
     my $discon = $conn_param{'discon'};
 
-    my $connectionKey = "$user:$pass\@$host:$port/$dbname;$type";
+    my $connectionKey = "$driver://$user:$pass\@$host:$port/$dbname;$type";
     my $dba = $_URLFactory_global_instance->{$connectionKey};
 
     unless($dba) {
@@ -150,13 +151,14 @@ sub create_cached_dba {
         eval "require $module";
 
         $_URLFactory_global_instance->{$connectionKey} = $dba = $module->new (
-            -disconnect_when_inactive => $discon,
-            -user   => $user,
-            -pass   => $pass,
+            -driver => $driver,
             -host   => $host,
             -port   => $port,
+            -user   => $user,
+            -pass   => $pass,
             -dbname => $dbname,
             -species => $dbname,
+            -disconnect_when_inactive => $discon,
         );
     }
     return $dba;
diff --git a/sql/tables.sqlite b/sql/tables.sqlite
new file mode 100644
index 000000000..812203168
--- /dev/null
+++ b/sql/tables.sqlite
@@ -0,0 +1,408 @@
+
+-- The first 3 tables are from the ensembl core schema: meta, analysis and analysis_description.
+-- We create them with the 'IF NOT EXISTS' option in case they already exist in the DB.
+
+-- ----------------------------------------------------------------------------------------------
+--
+-- Table structure for table 'meta' (FROM THE CORE SCHEMA)
+--
+
+CREATE TABLE IF NOT EXISTS meta (
+  meta_id                     INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+  species_id                  INT UNSIGNED DEFAULT 1,
+  meta_key                    VARCHAR(40) NOT NULL,
+  meta_value                  TEXT NOT NULL
+);
+CREATE UNIQUE INDEX IF NOT EXISTS species_key_value_idx ON meta (species_id, meta_key, meta_value);
+CREATE        INDEX IF NOT EXISTS species_value_idx     ON meta (species_id, meta_value);
+
+
+
+-- ----------------------------------------------------------------------------------------------
+--
+-- Table structure for table 'analysis' (FROM THE CORE SCHEMA)
+--
+-- semantics:
+--
+-- analysis_id - internal id
+-- created
+--   - date to distinguish newer and older versions off the same analysis. Not
+--     well maintained so far.
+-- logic_name - string to identify the analysis. Used mainly inside pipeline.
+-- db, db_version, db_file
+--  - db should be a database name, db version the version of that db
+--    db_file the file system location of that database,
+--    probably wiser to generate from just db and configurations
+-- program, program_version,program_file
+--  - The binary used to create a feature. Similar semantic to above
+-- module, module_version
+--  - Perl module names (RunnableDBS usually) executing this analysis.
+-- parameters - a paramter string which is processed by the perl module
+-- gff_source, gff_feature
+--  - how to make a gff dump from features with this analysis
+
+
+CREATE TABLE IF NOT EXISTS analysis (
+  analysis_id                 INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+  created                     datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
+  logic_name                  VARCHAR(40) NOT NULL,
+  db                          VARCHAR(120),
+  db_version                  VARCHAR(40),
+  db_file                     VARCHAR(255),
+  program                     VARCHAR(255),
+  program_version             VARCHAR(40),
+  program_file                VARCHAR(255),
+  parameters                  TEXT,
+  module                      VARCHAR(255),
+  module_version              VARCHAR(40),
+  gff_source                  VARCHAR(40),
+  gff_feature                 VARCHAR(40)
+);
+CREATE UNIQUE INDEX IF NOT EXISTS logic_name_idx ON analysis (logic_name);
+
+
+-- ----------------------------------------------------------------------------------------------
+--
+-- Table structure for table 'analysis_description' (FROM THE CORE SCHEMA)
+--
+
+CREATE TABLE IF NOT EXISTS analysis_description (
+  analysis_id                 INTEGER NOT NULL PRIMARY KEY,
+  description                 TEXT,
+  display_label               VARCHAR(255),
+  displayable                 BOOLEAN NOT NULL DEFAULT 1,
+  web_data                    TEXT
+);
+CREATE UNIQUE INDEX IF NOT EXISTS analysis_idx ON analysis_description (analysis_id);
+
+
+
+-- ------------------- now, to the 'proper' Hive tables: ----------------------------------------
+
+
+
+-- ----------------------------------------------------------------------------------------------
+--
+-- Table structure for table 'worker'
+--
+-- overview:
+--   Table which tracks the workers of a hive as they exist out in the world.
+--   Workers are created by inserting into this table so that there is only every
+--   one instance of a worker object in the world.  As workers live and do work,
+--   they update this table, and when they die they update.
+--
+-- semantics:
+--
+
+CREATE TABLE worker (
+  worker_id        INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+  analysis_id      INTEGER NOT NULL,
+  meadow_type      TEXT NOT NULL, /* enum('LSF', 'LOCAL') NOT NULL, */
+  host	           varchar(40) DEFAULT NULL,
+  process_id       varchar(40) DEFAULT NULL,
+  work_done        int(11) DEFAULT '0' NOT NULL,
+  status           TEXT DEFAULT 'READY' NOT NULL, /* enum('READY','COMPILATION','GET_INPUT','RUN','WRITE_OUTPUT','DEAD') DEFAULT 'READY' NOT NULL, */
+  born	           timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+  last_check_in    datetime NOT NULL,
+  died             datetime DEFAULT NULL,
+  cause_of_death   TEXT DEFAULT '' NOT NULL  /* enum('', 'NO_WORK', 'JOB_LIMIT', 'HIVE_OVERLOAD', 'LIFESPAN', 'CONTAMINATED', 'KILLED_BY_USER', 'MEMLIMIT', 'RUNLIMIT', 'FATALITY') DEFAULT '' NOT NULL */
+);
+CREATE        INDEX IF NOT EXISTS analysis_id_status_idx ON worker (analysis_id, status);
+
+
+-- ---------------------------------------------------------------------------------
+--
+-- Table structure for table 'dataflow_rule'
+--
+-- overview:
+--   Extension of simple_rule design except that goal(to) is now in extended URL format e.g.
+--   mysql://ensadmin:<pass>@ecs2:3361/compara_hive_test?analysis.logic_name='blast_NCBI34'
+--   (full network address of an analysis).  The only requirement is that there are rows in 
+--   the job, analysis, dataflow_rule, and worker tables so that the following join
+--   works on the same database 
+--   WHERE analysis.analysis_id = dataflow_rule.from_analysis_id 
+--   AND   analysis.analysis_id = job.analysis_id
+--   AND   analysis.analysis_id = worker.analysis_id
+--
+--   These are the rules used to create entries in the job table where the
+--   input_id (control data) is passed from one analysis to the next to define work.
+--  
+--   The analysis table will be extended so that it can specify different read and write
+--   databases, with the default being the database the analysis is on
+--
+-- semantics:
+--   dataflow_rule_id     - internal ID
+--   from_analysis_id     - foreign key to analysis table analysis_id
+--   to_analysis_url      - foreign key to net distributed analysis logic_name reference
+--   branch_code          - joined to job.branch_code to allow branching
+--   input_id_template    - a template for generating a new input_id (not necessarily a hashref) in this dataflow; if undefined is kept original
+
+CREATE TABLE dataflow_rule (
+  dataflow_rule_id    INTEGER PRIMARY KEY AUTOINCREMENT,
+  from_analysis_id    INTEGER NOT NULL,
+  to_analysis_url     varchar(255) default '' NOT NULL,
+  branch_code         int(10) default 1 NOT NULL,
+  input_id_template   TEXT DEFAULT NULL
+);
+CREATE UNIQUE INDEX IF NOT EXISTS from_to_branch_template_idx ON dataflow_rule (from_analysis_id, to_analysis_url, branch_code, input_id_template);
+
+
+-- ---------------------------------------------------------------------------------
+--
+-- Table structure for table 'analysis_ctrl_rule'
+--
+-- overview:
+--   These rules define a higher level of control.  These rules are used to turn
+--   whole anlysis nodes on/off (READY/BLOCKED).
+--   If any of the condition_analyses are not 'DONE' the ctrled_analysis is set BLOCKED
+--   When all conditions become 'DONE' then ctrled_analysis is set to READY
+--   The workers switch the analysis.status to 'WORKING' and 'DONE'.
+--   But any moment if a condition goes false, the analysis is reset to BLOCKED.
+--
+--   This process of watching conditions and flipping the ctrled_analysis state
+--   will be accomplished by another automous agent (CtrlWatcher.pm)
+--
+-- semantics:
+--   condition_analysis_url  - foreign key to net distributed analysis reference
+--   ctrled_analysis_id      - foreign key to analysis table analysis_id
+
+CREATE TABLE analysis_ctrl_rule (
+  condition_analysis_url     VARCHAR(255) DEFAULT '' NOT NULL,
+  ctrled_analysis_id         UNSIGNED INTEGER NOT NULL
+);
+CREATE UNIQUE INDEX IF NOT EXISTS condition_ctrled_idx ON analysis_ctrl_rule (condition_analysis_url, ctrled_analysis_id);
+
+
+-- ---------------------------------------------------------------------------------
+--
+-- Table structure for table 'job'
+--
+-- overview:
+--   The job is the heart of this system.  It is the kiosk or blackboard
+--   where workers find things to do and then post work for other works to do.
+--   These jobs are created prior to work being done, are claimed by workers,
+--   are updated as the work is done, with a final update on completion.
+--
+-- semantics:
+--   job_id                  - autoincrement id
+--   prev_job_id             - previous job which created this one (and passed input_id)
+--   analysis_id             - the analysis_id needed to accomplish this job.
+--   input_id                - input data passed into Analysis:RunnableDB to control the work
+--   worker_id               - link to worker table to define which worker claimed this job
+--   status                  - state the job is in
+--   retry_count             - number times job had to be reset when worker failed to run it
+--   completed               - datetime when job was completed
+--
+--   semaphore_count         - if this count is >0, the job is conditionally blocked (until this count drops to 0 or below).
+--                              Default=0 means "nothing is blocking me by default".
+--   semaphored_job_id       - the job_id of job S that is waiting for this job to decrease S's semaphore_count.
+--                              Default=NULL means "I'm not blocking anything by default".
+
+CREATE TABLE job (
+  job_id                    INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+  prev_job_id               INTEGER DEFAULT NULL,  /* the job that created this one using a dataflow rule */
+  analysis_id               INTEGER NOT NULL,
+  input_id                  char(255) NOT NULL,
+  worker_id                 INTEGER DEFAULT NULL,
+  status                    TEXT DEFAULT 'READY' NOT NULL, /* enum('READY','BLOCKED','CLAIMED','COMPILATION','GET_INPUT','RUN','WRITE_OUTPUT','DONE','FAILED','PASSED_ON') DEFAULT 'READY' NOT NULL, */
+  retry_count               int(10) DEFAULT 0 NOT NULL,
+  completed                 datetime DEFAULT NULL,
+  runtime_msec              int(10) DEFAULT NULL, 
+  query_count               int(10) DEFAULT NULL, 
+
+  semaphore_count           INTEGER DEFAULT 0 NOT NULL,
+  semaphored_job_id         INTEGER DEFAULT NULL
+);
+CREATE UNIQUE INDEX IF NOT EXISTS input_id_analysis_id_idx ON job (input_id, analysis_id);
+CREATE        INDEX IF NOT EXISTS analysis_status_sema_retry_idx ON job (analysis_id, status, semaphore_count, retry_count);
+CREATE        INDEX IF NOT EXISTS worker_idx ON job (worker_id);
+
+
+-- ---------------------------------------------------------------------------------
+--
+-- Table structure for table 'job_message'
+--
+-- overview:
+--      In case a job throws a message (via die/throw), this message is registered in this table.
+--      It may or may not indicate that the job was unsuccessful via is_error flag.
+--
+-- semantics:
+--      job_id              - the id of the job that threw the message
+--            worker_id     - the worker in charge of the job at the moment
+--                 time     - when the message was thrown
+--                retry     - retry_count of the job when the message was thrown
+--               status     - of the job when the message was thrown
+--                  msg     - string that contains the message
+--             is_error     - binary flag
+
+CREATE TABLE job_message (
+  job_id                    INTEGER NOT NULL,
+  worker_id                 INTEGER NOT NULL,
+  time                      timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+  retry                     int(10) DEFAULT 0 NOT NULL,
+  status                    TEXT DEFAULT 'UNKNOWN', /* enum('UNKNOWN', 'COMPILATION', 'GET_INPUT', 'RUN', 'WRITE_OUTPUT') DEFAULT 'UNKNOWN', */
+  msg                       TEXT,
+  is_error                  BOOLEAN,
+
+  PRIMARY KEY               (job_id, worker_id, time)
+);
+CREATE        INDEX IF NOT EXISTS worker_idx ON job_message (worker_id);
+CREATE        INDEX IF NOT EXISTS job_idx ON job_message (job_id);
+
+
+-- ---------------------------------------------------------------------------------
+--
+-- Table structure for table 'job_file'
+--
+-- overview:
+--   Table which holds paths to files created by jobs
+--   e.g. STDOUT STDERR, temp directory
+--   or output data files created by the RunnableDB
+--   There can only be one entry of a certain type for a given job
+--
+-- semantics:
+--   job_id             - foreign key
+--   worker_id          - link to worker table to define which worker claimed this job
+--   retry              - copy of retry_count of job as it was run
+--   type               - type of file e.g. STDOUT, STDERR, TMPDIR, ...
+--   path               - path to file or directory
+
+CREATE TABLE job_file (
+  job_id                  INTEGER NOT NULL,
+  worker_id               INTEGER NOT NULL,
+  retry                   int(10) NOT NULL,
+  type                    varchar(16) NOT NULL default '',
+  path                    varchar(255) NOT NULL
+);
+CREATE UNIQUE INDEX IF NOT EXISTS job_worker_type_idx ON job_file (job_id, worker_id, type);
+CREATE        INDEX IF NOT EXISTS worker_idx ON job_file (worker_id);
+
+
+-- ---------------------------------------------------------------------------------
+--
+-- Table structure for table 'analysis_data'
+--
+-- overview:
+--   Table which holds LONGTEXT data for use by the analysis system.
+--   This data is general purpose and it's up to each analysis to
+--   determine how to use it
+--
+-- semantics:
+--   analysis_data_id   - primary id
+--   data               - text blob which holds the data
+
+CREATE TABLE analysis_data (
+  analysis_data_id  INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+  data              longtext
+);
+CREATE        INDEX IF NOT EXISTS data_idx ON analysis_data (data);
+
+
+CREATE TABLE resource_description (
+    rc_id                 INTEGER NOT NULL,
+    meadow_type           TEXT, /* enum('LSF', 'LOCAL') DEFAULT 'LSF' NOT NULL, */
+    parameters            varchar(255) DEFAULT '' NOT NULL,
+    description           varchar(255),
+    PRIMARY KEY(rc_id, meadow_type)
+);
+
+
+-- ---------------------------------------------------------------------------------
+--
+-- Table structure for table 'analysis_stats'
+--
+-- overview:
+--   Parallel table to analysis which provides high level statistics on the
+--   state of an analysis and it's jobs.  Used to provide a fast overview, and to
+--   provide final approval of 'DONE' which is used by the blocking rules to determine
+--   when to unblock other analyses.  Also provides
+--
+-- semantics:
+--   analysis_id          - foreign key to analysis table
+--   status               - overview status of the jobs (cached state)
+--   failed_job_tolerance - % of tolerated failed jobs
+--   rc_id                - resource class id (analyses are grouped into disjoint classes)
+
+CREATE TABLE analysis_stats (
+  analysis_id           INTEGER NOT NULL,
+  status                TEXT DEFAULT 'READY' NOT NULL, /* enum('BLOCKED', 'LOADING', 'SYNCHING', 'READY', 'WORKING', 'ALL_CLAIMED', 'DONE', 'FAILED') DEFAULT 'READY' NOT NULL, */
+  batch_size                int(10) default 1 NOT NULL,
+  avg_msec_per_job          int(10) default 0 NOT NULL,
+  avg_input_msec_per_job    int(10) default 0 NOT NULL,
+  avg_run_msec_per_job      int(10) default 0 NOT NULL,
+  avg_output_msec_per_job   int(10) default 0 NOT NULL,
+  hive_capacity         int(10) default 1 NOT NULL,
+  behaviour             TEXT DEFAULT 'STATIC' NOT NULL, /* enum('STATIC', 'DYNAMIC') DEFAULT 'STATIC' NOT NULL, */
+  input_capacity        int(10) NOT NULL DEFAULT 4,
+  output_capacity       int(10) NOT NULL DEFAULT 4,
+  total_job_count       int(10) NOT NULL DEFAULT 0,
+  unclaimed_job_count   int(10) NOT NULL DEFAULT 0,
+  done_job_count        int(10) NOT NULL DEFAULT 0,
+  max_retry_count       int(10) NOT NULL DEFAULT 3,
+  failed_job_count      int(10) NOT NULL DEFAULT 0,
+  failed_job_tolerance  int(10) NOT NULL DEFAULT 0,
+  num_running_workers   int(10) NOT NULL DEFAULT 0,
+  num_required_workers  int(10) NOT NULL DEFAULT 0,
+  last_update           datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
+  sync_lock             int(10) NOT NULL DEFAULT 0,
+  rc_id                 INTEGER NOT NULL DEFAULT 0,
+  can_be_empty          TINYINT UNSIGNED DEFAULT 0 NOT NULL
+);
+CREATE UNIQUE INDEX IF NOT EXISTS analysis_idx ON analysis_stats (analysis_id);
+
+
+CREATE TABLE analysis_stats_monitor (
+  time                  datetime NOT NULL default '0000-00-00 00:00:00',
+  analysis_id           INTEGER NOT NULL,
+  status                TEXT DEFAULT 'READY' NOT NULL, /* enum('BLOCKED', 'LOADING', 'SYNCHING', 'READY', 'WORKING', 'ALL_CLAIMED', 'DONE', 'FAILED') DEFAULT 'READY' NOT NULL, */
+  batch_size            int(10) default 1 NOT NULL,
+  avg_msec_per_job      int(10) default 0 NOT NULL,
+  avg_input_msec_per_job      int(10) default 0 NOT NULL,
+  avg_run_msec_per_job      int(10) default 0 NOT NULL,
+  avg_output_msec_per_job      int(10) default 0 NOT NULL,
+  hive_capacity         int(10) default 1 NOT NULL,
+  behaviour             TEXT DEFAULT 'STATIC' NOT NULL, /* enum('STATIC', 'DYNAMIC') DEFAULT 'STATIC' NOT NULL, */
+  input_capacity        int(10) default 4 NOT NULL,
+  output_capacity       int(10) default 4 NOT NULL,
+  total_job_count       int(10) NOT NULL DEFAULT 0,
+  unclaimed_job_count   int(10) NOT NULL DEFAULT 0,
+  done_job_count        int(10) NOT NULL DEFAULT 0,
+  max_retry_count       int(10) default 3 NOT NULL,
+  failed_job_count      int(10) NOT NULL DEFAULT 0,
+  failed_job_tolerance  int(10) default 0 NOT NULL,
+  num_running_workers   int(10) default 0 NOT NULL,
+  num_required_workers  int(10) NOT NULL DEFAULT 0,
+  last_update           datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
+  sync_lock             int(10) default 0 NOT NULL,
+  rc_id                 INTEGER DEFAULT 0 NOT NULL,
+  can_be_empty          TINYINT UNSIGNED DEFAULT 0 NOT NULL
+);
+
+-- ---------------------------------------------------------------------------------
+--
+-- Table structure for table 'monitor'
+--
+-- overview:
+--   This table stores information about hive performance.
+--
+-- semantics:
+--   time           - datetime
+--   workers        - number of running workers
+--   throughput     - average numb of completed jobs per sec. of the hive
+--                    (this number is calculated using running workers only)
+--   per_worker     - average numb of completed jobs per sec. per worker
+--                    (this number is calculated using running workers only)
+--   analysis       - analysis(es) running at that time
+
+CREATE TABLE monitor (
+  time                  datetime NOT NULL default '0000-00-00 00:00:00',
+  workers               int(10) NOT NULL default '0',
+  throughput            float default NULL,
+  per_worker            float default NULL,
+  analysis              varchar(255) default NULL  /* not just one, but a list of logic_names */
+);
+
+
+-- Auto add schema version to database (should be overridden by Compara's table.sql)
+INSERT OR IGNORE INTO meta (species_id, meta_key, meta_value) VALUES (NULL, 'schema_version', '62');
+
-- 
GitLab