From a3a9331d29b06b19b80d06394f73e892d99e902d Mon Sep 17 00:00:00 2001
From: Andrew Yates <ayates@ebi.ac.uk>
Date: Mon, 19 Jul 2010 15:26:46 +0000
Subject: [PATCH] Addition of utility classes for use in the Ensembl API. The
 first are shortcuts for asserting object/ref types and the second is a class
 which removes boilerplate from working with databases.

---
 modules/Bio/EnsEMBL/Utils/Scalar.pm    | 137 +++++
 modules/Bio/EnsEMBL/Utils/SqlHelper.pm | 806 +++++++++++++++++++++++++
 modules/t/sqlHelper.t                  | 143 +++++
 modules/t/utilsScalar.t                |  43 ++
 4 files changed, 1129 insertions(+)
 create mode 100644 modules/Bio/EnsEMBL/Utils/Scalar.pm
 create mode 100644 modules/Bio/EnsEMBL/Utils/SqlHelper.pm
 create mode 100644 modules/t/sqlHelper.t
 create mode 100644 modules/t/utilsScalar.t

diff --git a/modules/Bio/EnsEMBL/Utils/Scalar.pm b/modules/Bio/EnsEMBL/Utils/Scalar.pm
new file mode 100644
index 0000000000..aba4315997
--- /dev/null
+++ b/modules/Bio/EnsEMBL/Utils/Scalar.pm
@@ -0,0 +1,137 @@
+package Bio::EnsEMBL::Utils::Scalar;
+
+=pod
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2010 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <ensembl-dev@ebi.ac.uk>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=pod
+
+=head1 NAME
+
+Bio::EnsEMBL::Utils::Scalar
+
+=head1 SYNOPSIS
+
+	use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref);
+	
+	check_ref([], 'ARRAY'); # Will return true
+	check_ref({}, 'ARRAY'); # Will return false
+	check_ref($dba, 'Bio::EnsEMBL::DBSQL::DBAdaptor'); #Returns true if $dba is a DBAdaptor
+	
+	assert_ref([], 'ARRAY'); #Returns true
+	assert_ref({}, 'ARRAY'); #throws an exception
+	assert_ref($dba, 'Bio::EnsEMBL::Gene'); #throws an exception if $dba is not a Gene  
+	
+=head1 DESCRIPTION
+
+A collection of subroutines aimed to helping Scalar based operations
+
+=head1 METHODS
+
+See subroutines.
+
+=head1 MAINTAINER
+
+$Author$
+
+=head1 VERSION
+
+$Revision$
+
+=cut
+
+use strict;
+use warnings;
+
+use base qw(Exporter);
+our @EXPORT_OK = qw(check_ref assert_ref);
+
+use Bio::EnsEMBL::Utils::Exception qw(throw);
+use Scalar::Util qw(blessed);
+
+=head2 check_ref()
+
+  Arg [1]     : The reference to check
+  Arg [2]     : The type we expect
+  Description : A subroutine which checks to see if the given object/ref is 
+                what you expect. If you give it a blessed reference then it 
+                will perform an isa() call on the object after the defined 
+                tests. If it is a plain reference then it will use ref().
+                
+                An undefined value will return a false.
+  Returntype  : Boolean indicating if the reference was the type we 
+                expect
+  Example     : my $ok = check_ref([], 'ARRAY');
+  Exceptions  : If the expected type was not set
+  Status      : Stable
+
+=cut
+
+sub check_ref {
+	my ($ref, $expected) = @_;
+	throw('No expected type given') if ! defined $expected;
+	if(defined $ref) {
+		if(blessed($ref)) {
+			return 1 if $ref->isa($expected);
+		}
+		else {
+			my $ref_ref_type = ref($ref);
+			return 1 if defined $ref_ref_type && $ref_ref_type eq $expected; 
+		}
+	}
+	return 0;
+}
+
+=head2 assert_ref()
+
+  Arg [1]     : The reference to check
+  Arg [2]     : The type we expect
+  Description : A subroutine which checks to see if the given object/ref is 
+                what you expect. This behaves in an identical manner as
+                C<check_ref()> does except this will raise exceptions when
+                the values do not match rather than returning a boolean
+                indicating the situation.
+                
+                Undefs cause exception circumstances.
+  Returntype  : None
+  Example     : assert_ref([], 'ARRAY');
+  Exceptions  : If the expected type was not set and if the given reference
+                was not assignable to the expected value
+  Status      : Stable
+
+=cut
+
+sub assert_ref {
+  my ($ref, $expected) = @_;
+  throw('No expected type given') if ! defined $expected;
+  my $class = ref($ref);
+  throw('Given reference was undef') unless defined $ref;
+  throw('Asking for the type of the reference produced no type; check your input is a reference') unless $class;
+  if(blessed($ref)) {
+    throw("Reference '${class}' is not an ISA of '${expected}'") if ! $ref->isa($expected);
+  }
+  else {    
+    throw("'${expected}' expected class was not equal to actual class '${class}'") if $expected ne $class;
+  }
+  return 1;
+}
+
+1;
diff --git a/modules/Bio/EnsEMBL/Utils/SqlHelper.pm b/modules/Bio/EnsEMBL/Utils/SqlHelper.pm
new file mode 100644
index 0000000000..1dc33b80a8
--- /dev/null
+++ b/modules/Bio/EnsEMBL/Utils/SqlHelper.pm
@@ -0,0 +1,806 @@
+package Bio::EnsEMBL::Utils::SqlHelper;
+
+=pod
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2010 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <ensembl-dev@ebi.ac.uk>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+  
+=cut
+
+=pod
+
+=head1 NAME
+
+Bio::EnsEMBL::Utils::SqlHelper
+
+=head1 VERSION
+
+$Revision$
+
+=head1 SYNOPSIS
+
+	use Bio::EnsEMBL::Utils::SqlHelper;
+
+	my $helper = Bio::EnsEMBL::Utils::SqlHelper->new(-DB_CONNECTION => $dbc);
+	my $arr_ref = $helper->execute(
+		-SQL => 'select name, age from tab where col =?',
+		-CALLBACK => sub {
+			my @row = @{shift @_};
+			return {name=>$row[0], age=>$row[1]};
+		},
+		-PARAMS => ['A']
+	);
+
+	use Data::Dumper;
+	print Dumper($arr_ref), "\n";
+	#Prints out [name=>'name', age=>1] maybe ....
+	
+	#For transactional work; only works if your MySQL table 
+	#engine/database supports transactional work (such as InnoDB)
+	$helper->transaction( -CALLBACK => sub {
+	  if($helper->execute_single_result(-SQL => 'select count(*) from tab')) {
+	    return $helper->execute_update('delete from tab);
+	  }
+	  else {
+	    return $helper->batch(-SQL => 'insert into tab (?,?)', -DATA => [
+	     [1,2],
+	     [1,3],
+	     [1,4]
+	    ]);
+	  }
+	});
+
+=head1 DESCRIPTION
+
+Easier database interaction
+
+=head1 COMMITTER
+
+$Author$
+
+=head1 METHODS
+
+See subrotuines.
+
+=cut
+
+use warnings;
+use strict;
+
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use English qw( -no_match_vars ); #Used for $PROCESS_ID
+use Scalar::Util qw(weaken); #Used to not hold a strong ref to DBConnection
+
+=pod
+
+=head2 new()
+
+  Arg [DB_CONNECTION] : DBConnection instance to use
+  Returntype          : Instance of helper
+  Exceptions          : If the object given as a DBConnection is not one
+  Status              : Stable
+
+Creates a new instance of this object.
+
+	my $dba = get_dba('mydb'); # New DBAdaptor from somewhere
+	my $helper = Bio::EnsEMBL::Utils::SqlHelper->new(-DB_CONNECTION => $dba->dbc());
+	$helper->execute_update(-SQL => 'update tab set flag=?', -PARAMS => [1]);
+
+=cut
+
+sub new {
+	my ( $class, @args ) = @_;
+	
+	my ($db_connection) = rearrange([qw(db_connection)], @args);
+	 
+	my $self = bless( {}, ref($class) || $class );
+	$self->db_connection($db_connection);
+	
+	return $self;
+}
+
+=pod
+
+  Arg [1]     : DBConnection instance to use
+  Description : Sets and retrieves the DBConnection 
+  Returntype  : DBConnection if set; otherwise undef
+  Exceptions  : If the object given as a DBConnection is not one or if an 
+                attempt is made to set the value more than once
+  Status      : Stable
+
+=cut
+
+sub db_connection {
+  my ($self, $db_connection) = @_;
+  if(defined $db_connection) {
+    if(exists $self->{db_connection}) {
+      throw('Cannot reset the DBConnection object; already defined ');
+    }
+    assert_ref($db_connection, 'Bio::EnsEMBL::DBSQL::DBConnection');
+    $self->{db_connection} = $db_connection;
+    weaken $self->{db_connection};
+  }
+  return $self->{db_connection};
+}
+
+# --------- SQL Methods
+
+=pod
+
+=head2 execute() - Execute a SQL statement with a custom row handler
+
+  Arg [SQL]           : SQL to execute
+  Arg [CALLBACK]      : The callback to use for mapping a row to a data point; 
+                        leave blank for a default mapping to a 2D array
+  Arg [USE_HASHREFS]  : If set to true will cause HashRefs to be returned 
+                        to the callback & not ArrayRefs
+  Arg [PARAMS]        : The binding parameters to the SQL statement
+  Returntype : 2D array containing the return of the callback
+  Exceptions : If errors occur in the execution of the SQL
+  Status     : Stable
+
+	my $arr_ref = $helper->execute(
+		-SQL      => 'select a,b,c from tab where col =?',
+		-CALLBACK => sub {
+			my @row = @{shift @_};
+			return {A=>$row[0], B=>$row[1], C=>$row[2]};
+		},
+		-PARAMS   => ['A']
+	);
+	
+	#Or with hashrefs
+	my $arr_ref = $helper->execute(
+		-SQL          => 'select a,b,c from tab where col =?',
+		-USE_HASHREFS => 1,
+		-PARAMS       => ['A'],
+		-CALLBACK     => sub {
+			my $row = shift @_;
+			return {A=>$row->{a}, B=>$row->{b}, C=>$row->{c}};
+		}
+	);
+
+Uses a callback defined by the C<sub> decalaration. Here we specify how the 
+calling code will deal with each row of a database's result set. The sub 
+can return any type of Object/hash/data structure you require.
+
+Should you not specify a callback then a basic one will be assigned to you
+which will return a 2D array structure e.g.
+
+  my $arr_ref = $helper->execute(
+		-SQL    => 'select a,b,c from tab where col =?',
+		-PARAMS => ['A']
+	);
+	
+This is equivalent to DBI's c<selectall_arrayref()> subroutine.
+
+As an extension to this method you can write a closure subroutine which
+takes in two parameters. The first is the array/hash reference & the second is
+the statement handle used to execute. 99% of the time you will not need
+it but there are occasions where you do need it. An example of usage would be:
+
+	my $conn = get_conn(); #From somwewhere
+	my $arr_ref = $conn->execute(
+		-SQL          => 'select a,b,c from tab where col =?',
+		-USE_HASHREFS => 1,
+		-PARAMS       => ['A'],
+		-CALLBACK     => sub {
+			my ($row, $sth) = @_;
+			#Then do something with sth
+			return {A=>$row->[0], B=>$row->[1], C=>$row->[2]};
+		}
+	);
+
+Any arguments to bind to the incoming statement. This can be a set of scalars
+or a 2D array if you need to specify any kind of types of sql objects i.e.
+
+	use DBI qw(:sql_types);
+
+	my $conn = get_conn();
+	my $arr_ref = $conn->execute(
+		-SQL => 'select a,b,c from tab where col =? and num_col=? and other=?',
+		-USE_HASHREFS => 1,
+		-CALLBACK => sub {
+			my @row = @{shift @_};
+			return {A=>$row[0], B=>$row[1], C=>$row[2]};
+		},
+		-PARAMS => ['1', SQL_VARCHAR], [2, SQL_INTEGER], 'hello'
+	);
+
+Here we import DBI's sql types into our package and then pass in multiple
+anonymous array references as parameters. Each param is tested in the input
+and if it is detected to be an ARRAY reference we dereference the array and
+run DBI's bind_param method. In fact you can see each part of the incoming
+paramaters array as the contents to call C<bind_param> with. The only difference
+is the package tracks the bind position for you.
+
+=cut
+
+sub execute {
+	my ( $self, @args ) = @_;
+	my ($sql, $callback, $use_hashrefs, $params) = rearrange([qw(sql callback use_hashrefs params)], @args);
+	my $has_return = 1;
+	
+	#If no callback then we execute using a default one which returns a 2D array
+	if(!defined $callback) {
+    throw('Cannot use fetchrow_hashref() with default mappers. Turn off this option') if $use_hashrefs;
+    $callback = $self->_mappers()->{array_ref};
+	}
+	
+	return $self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params );
+}
+
+=pod
+
+=head2 execute_simple()
+
+  Arg [SQL]           : SQL to execute
+  Arg [PARAMS]        : The binding parameters to the SQL statement
+  Returntype : 1D array of data points
+  Exceptions : If errors occur in the execution of the SQL
+  Status     : Stable
+
+  my $classification = $helper->execute_simple(
+    -SQL    => 'select meta_val from meta where meta_key =? order by meta_id', 
+    -PARAMS => ['species.classification']
+  );
+
+Identical to C<execute> except you do not specify a sub-routine reference. 
+Using this code assumes you want an array of single scalar values as returned 
+by the given SQL statement.
+
+=cut
+
+sub execute_simple {
+  my ( $self, @args ) = @_;
+	my ($sql, $params) = rearrange([qw(sql params)], @args);
+	my $has_return = 1;
+	my $use_hashrefs = 0;
+	my $callback = $self->_mappers()->{first_element};
+	return $self->_execute($sql, $callback, $has_return, $use_hashrefs, $params);
+}
+
+=pod
+
+=head2 execute_no_return()
+
+  Arg [SQL]           : SQL to execute
+  Arg [CALLBACK]      : The callback to use for mapping a row to a data point;
+                        we assume you are assigning into a data structure which
+                        has requirements other than simple translation into an
+                        array
+  Arg [USE_HASHREFS]  : If set to true will cause HashRefs to be returned 
+                        to the callback & not ArrayRefs
+  Arg [PARAMS]        : The binding parameters to the SQL statement
+  Returntype : None
+  Exceptions : If errors occur in the execution of the SQL
+  Status     : Stable
+
+Whilst all other execute methods will return something; this assumes that the
+given mapper subroutine will be performing the business of placing values
+somewhere or doing something with them.
+
+There is a huge temptation to nest queries using this method; do not! Execute
+the values into an array using one of the other methods then run your subqueries
+on them; or make a better first query. SQL is flexible; so use it.
+
+=cut
+
+sub execute_no_return {
+	my ( $self, @args ) = @_;
+	my ($sql, $callback, $use_hashrefs, $params) = rearrange([qw(sql callback use_hashrefs params)], @args);
+	throw('No callback defined but this is a required parameter for execute_no_return()') if ! $callback;
+	my $has_return = 0;
+	$self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params );
+	return;
+}
+
+=pod
+
+=head2 execute_into_hash()
+
+  Arg [SQL]       : SQL to execute
+  Arg [CALLBACK]  : The callback to use for mapping to a value in a hash
+                    keyed by the first element in your result set; 
+                    leave blank for a default mapping to a scalar value
+                    of the second element
+  Arg [PARAMS]    : The binding parameters to the SQL statement
+  Returntype      : A HashRef keyed by column 1 & value is the return of
+                    the callback
+  Exceptions      : If errors occur in the execution of the SQL
+  Status          : Stable
+
+A variant of the execute methods but rather than returning a list of mapped
+results this will assume the first column of a returning map & the calling
+subroutine will map the remainder of your return as the hash's key. For example:
+
+	my $sql = 'select key, one, two from table where something =?';
+	my $mapper = sub {
+		my ($row) = @_;
+		#Ignore field 0 as that is being used for the key
+		my $obj = Some::Obj->new(one=>$row->[1], two=>$row->[2]);
+		return $obj;
+	};
+
+	my $hash = $helper->execute_into_hash(-SQL => $sql, -CALLBACK => $mapper, -PARAMS => ['val']);
+	
+	#Or for a more simple usage
+	my $sql = 'select biotype, count(gene_id) from gene group by biotype';
+	my $biotype_hash = $conn->execute_into_hash(-SQL => $sql);
+	print $biotype_hash->{protein_coding} || 0, "\n";
+
+The basic pattern assumes a scenario where you are mapping in a one key to
+one value. For more advanced mapping techniques you need to start using the
+non-consuming executes which allow you to process a result set without assuming
+that you want to map the rows into single objects.
+
+B<Remember that the row you are given is the full row & not a view of the
+reminaing fields.> Therefore indexing for the data you are concerned with
+begins at position 1.
+
+=cut
+
+sub execute_into_hash {
+	my ( $self, @args ) = @_;
+	my ($sql, $callback, $params) = rearrange([qw(sql callback params)], @args);
+	my $hash = {};
+	
+	#If no callback then we execute using a default one which sets value to 2nd element
+	if(!defined $callback) {
+	 $callback = $self->_mappers()->{second_element};
+	}
+	
+	#Default mapper uses the 1st key + something else from the mapper
+	my $mapper = sub {
+		my $row   = shift @_;
+		my $value = $callback->($row);
+		$hash->{ $row->[0] } = $value;
+		return;
+	};
+	
+	$self->execute_no_return(
+	  -SQL => $sql, 
+	  -CALLBACK => $mapper,
+	  -PARAMS => $params
+	);
+	
+	return $hash;
+}
+
+=pod
+
+=head2 execute_single_result()
+
+  Arg [SQL]           : SQL to execute
+  Arg [CALLBACK]      : The callback to use for mapping a row to a data point; 
+                        leave blank for a default scalar mapping
+  Arg [USE_HASHREFS]  : If set to true will cause HashRefs to be returned 
+                        to the callback & not ArrayRefs
+  Arg [PARAMS]        : The binding parameters to the SQL statement
+  Returntype          : One data point
+  Exceptions          : If errors occur in the execution of the SQL
+  Status              : Stable
+
+  my $meta_count = $helper->execute_single_result(
+    -SQL => 'select count(*) from meta where species_id =?', 
+    -PARAMS => [1]
+  );
+
+Very similar to C<execute()> except it will raise an exception if we have more 
+or less than one row returned
+
+=cut
+
+sub execute_single_result {
+	my ( $self, @args ) = @_;
+	my ($sql, $callback, $use_hashrefs, $params) = rearrange(
+	 [qw(sql callback use_hashrefs params)], @args);
+	
+	my $results = $self->execute_simple( 
+	  -SQL => $sql, 
+	  -CALLBACK => $callback, 
+	  -USE_HASHREFS => $use_hashrefs, 
+	  -PARAMS => $params
+	);
+	
+	my $result_count = scalar(@{$results});
+	if($result_count != 1) {
+	  $params = [] if ! $params;
+	  my $type = ($result_count == 0) ? 'No' : 'Too many';
+		my $msg = "${type} results returned. Expected 1 but got $result_count for query '${sql}' with params [";
+		$msg .= join( ',', map {(defined $_) ? $_ : '-undef-';} @{$params} );
+		$msg .= ']';
+		throw($msg);
+	}
+	return $results->[0];
+}
+
+=pod
+
+=head2 transaction()
+
+  Arg [CALLBACK]  : The callback used for transaction isolation; once 
+                    the subroutine exists the code will decide on rollback
+                    or commit
+  Returntype      : Return of the callback
+  Exceptions      : If errors occur in the execution of the SQL
+  Status          : Stable
+
+  my $val = $helper->transaction(-CALLBACK => sub {
+    my ($dbc) = @_;
+    #Do something
+    return 1;
+  });
+  
+  #Or because of the arguments method we use
+  my $val = $helper->transaction(sub {
+    my ($dbc) = @_;
+    #Do something
+    return 1;
+  });
+
+Creates a transactional block which will ensure that the connection is committed
+when your submmited subroutine has finished or will rollback in the event of
+an error occuring in your block.
+
+The code will always force AutoCommit off but will restore it to its 
+previous setting. If your DBI/DBD driver does not support manual commits
+then this code will break. The code will turn off the 
+C<disconnect_when_idle()> method to allow transactions to work as expected.
+
+Creating a transaction within a transaction results in the commit rollback 
+statements occuring in the top level transaction. That way any block of 
+code which is meant to to be transaction can be wrapped in this block (
+assuming the same instance of SQLHelper is passed around & used).
+
+=cut
+
+sub transaction {
+  my ($self, @args) = @_;
+  
+  my ($callback) = rearrange([qw(callback)], @args);
+  
+  throw('Callback was not a CodeRef. Got a reference of type ['.ref($callback).']') 
+    unless check_ref($callback, 'CODE');
+ 
+  my $dbc = $self->db_connection();
+  my $original_dwi;
+  my $ac;
+  
+  #If we were already in a transaction then we do not do any management of the
+  #session & wait for the parent transaction(s) to finish
+  my $perform_transaction = $self->_perform_transaction_code();
+  if($perform_transaction) {
+    $original_dwi = $dbc->disconnect_when_inactive();
+    $ac = $dbc->db_handle()->{'AutoCommit'};
+    $dbc->db_handle()->{'AutoCommit'} = 0;
+    $self->_enable_transaction();
+  }
+  
+  my $error;
+  my $result;
+  
+  eval {
+    $result = $callback->($dbc);
+    $dbc->db_handle()->commit() if $perform_transaction;
+  };
+  $error = $@;
+  
+  if($perform_transaction) {
+    if($error) {
+      eval { $dbc->db_handle()->rollback(); };
+    }
+    $dbc->db_handle()->{'AutoCommit'} = $ac;
+    $dbc->disconnect_when_inactive($original_dwi);
+    $self->_disable_transaction();
+  }
+  
+  throw("Transaction aborted because of error: ${error}") if $error;
+  
+  return $result;
+}
+
+=pod
+
+=head2 execute_update()
+
+  Arg [SQL]       : SQL to execute
+  Arg [CALLBACK]  : The callback to use for calling methods on the 
+                    DBI statement handle or DBConnection object after an 
+                    update command
+  Arg [PARAMS]    : The binding parameters to the SQL statement
+  Returntype      : Number of rows affected
+  Exceptions      : If errors occur in the execution of the SQL
+  Status          : Stable
+
+Used for performing updates but conforms to the normal execute statement
+subroutines.
+
+	use DBI qw(:sql_types);
+	$helper->execute_update(
+	 -SQL => 'update tab set name = ? where id =?', 
+	 -PARAMS => ['andy', [1, SQL_INTEGER]]
+	);
+
+If you need to do something a bit more advanced with your DML then you can
+give the method a closure and this will be called after the execute has been
+issued i.e.
+
+	my $obj;
+	$helper->execute_update(
+	 -SQL => 'insert into tab (name) values(?)', 
+	 -CALLBACK => sub {
+	   my ($sth, $dbc) = @_;
+	   $obj->{id} = $dbh->{mysql_insertid);
+   }, 
+   -PARAMS => [$obj->name()]
+  );
+
+This lets us access the statement handle & database handle to access other
+properties such as the last identifier inserted.
+
+=cut
+
+sub execute_update {
+  my ($self, @args) = @_;
+  my ($sql, $callback, $params) = rearrange([qw(sql callback params)], @args);
+  my $rv = 0;
+  my $sth;
+  eval {
+    $sth = $self->db_connection()->prepare($sql);
+    $self->_bind_params($sth, $params);
+    $rv = $sth->execute();
+    $callback->($sth, $self->db_connection()) if $callback;
+  };
+  my $error = $@;
+  $self->_finish_sth($sth);
+  if($error) {
+    my $params = join ' ', map { (defined $_) ? $_ : q{undef} } @{$params};
+    throw("Cannot apply sql '${sql}' with params '${params}': ${error}");
+  }
+  return $rv;
+}
+
+=pod
+
+=head1 batch()
+
+  Arg [SQL]       : SQL to execute
+  Arg [CALLBACK]  : The callback to use for binding the data to execute in
+                    the batch statement; optional (if you specify DATA)
+  Arg [DATA]      : The data to use in the batch statement; optional (if you
+                    specify CALLBACK)
+  Returntype      : Number of rows affected
+  Exceptions      : If errors occur in the execution of the SQL
+  Status          : Stable
+
+  my $alotofdata = getitfromsomewhere();
+  $helper->batch(-SQL => 'insert into table (one,two) values(?,?)', -CALLBACk => sub {
+    my ($sth, $dbc) = @_;
+    foreach my $data (@alotofdata) {
+      $sth->execute(@{$data});
+    }
+  });
+  
+  #Or for a 2D array data driven approach
+  $helper->batch(-SQL => 'insert into table (one,two) values(?,?)', -DATA => $alotofdata);
+
+Takes in a sql statement & a code reference. Your SQL is converted into a 
+prepared statement & then given as the first parameter to the closure. The
+second parameter is the DBH which created the statement. This is intended
+to let you do mass insertion into a database without the need to
+re-preparing the same statement.
+
+This can be combined with the transaction() code to provide a construct
+which does batch insertion & is transactionally aware.
+
+We can also use data based batch insertions i.e.
+
+  #Needs to be like:
+  #   [ [1,2], [3,4] ]
+  #Or if using the DBI types: 
+  #   [ [ [1, SQL_INTEGER], [2, SQL_INTEGER] ], [ [3, SQL_INTEGER], [4, SQL_INTEGER] ] ]
+  my $alotofdata = getitfromsomewhere(); 
+  $helper->batch(-SQL => 'insert into table (one,two) values(?,?)', 
+    -DATA => $alotofdata);
+  
+This does exactly what the previous example.
+
+All batch statements will return the value the callback computes. If you are 
+using the previous example with a data array then the code will return the
+number affected rows by the query. 
+
+=cut
+
+sub batch {
+  my ($self, @args) = @_;
+  my ($sql, $callback, $data) = rearrange([qw(sql callback data)], @args);
+  
+  if(! defined $callback && ! defined $data) {
+    throw('You need to define a callback for insertion work or the 2D data array');
+  }
+  
+  my $result;
+  if(defined $callback) {
+    $result = $self->_callback_batch($sql, $callback);
+  }
+  else {
+    $result = $self->_data_batch($sql, $data);
+  }
+  return $result if defined $result;
+  return;
+}
+
+#------- Internal methods
+
+sub _mappers {
+  my ($self) = @_;
+  if(! exists $self->{_mappers}) {
+    $self->{_mappers} = {
+      first_element => sub {
+        my ($row) = @_;
+        return $row->[0];
+      },
+      second_element => sub {
+        my ($row) = @_;
+        return $row->[1];
+      },
+      array_ref => sub {
+        my $row = shift @_;
+	     return [@{$row}];
+      }
+    };
+  }
+  return $self->{_mappers};
+}
+
+sub _perform_transaction_code {
+  my ($self) = @_;
+  return $self->{_transaction_active}->{$PROCESS_ID} ? 0 : 1;
+}
+
+sub _enable_transaction {
+  my ($self) = @_;
+  $self->{_transaction_active}->{$PROCESS_ID} = 1;
+  return;
+}
+
+sub _disable_transaction {
+  my ($self) = @_;
+  delete $self->{_transaction_active}->{$PROCESS_ID};
+  return;
+}
+
+sub _bind_params {
+	my ( $self, $sth, $params ) = @_;
+	
+	return if ! defined $params; #Return quickly if we had no data
+	
+	if(! check_ref($params, 'ARRAY')) {
+	  throw(qq{The given parameters reference '${params}' is not an ARRAY; wrap in an ArrayRef});
+	}
+	
+	my $count = 1;
+	foreach my $param (@{$params}) {
+		if ( check_ref($param, 'ARRAY') ) {
+			$sth->bind_param( $count, @{$param} );
+		}
+		else {
+			$sth->bind_param( $count, $param );
+		}
+		$count++;
+	}
+	return;
+}
+
+sub _execute {
+	my ( $self, $sql, $callback, $has_return, $use_hashrefs, $params ) = @_;
+	
+	throw('Not given a mapper. _execute() must always been given a CodeRef') unless check_ref($callback, 'CODE');
+	
+	$params = [] unless $params;
+	
+	my $conn = $self->db_connection;
+	my @results;
+
+	my $error;
+	my $sth_close_error;
+	my $sth;
+
+	eval {
+		$sth = $conn->prepare($sql);
+		throw("Cannot continue as prepare() did not return a handle") unless $sth;
+		$self->_bind_params( $sth, $params );
+		$sth->execute();
+		if($use_hashrefs) {
+		  while( my $row = $sth->fetchrow_hashref() ) {
+		    push(@results, $callback->($row, $sth));
+		  }
+		}
+		else {
+      while ( my $row = $sth->fetchrow_arrayref() ) {
+  			push(@results, $callback->($row, $sth));
+  		}
+		}
+	};
+	
+	$error = $@;
+	$self->_finish_sth($sth);
+	if($error) {
+  	throw("Cannot run '${sql}' with params '@{$params}' due to error: $error") if $error;
+	}
+	return \@results if $has_return;
+	return;
+}
+
+sub _finish_sth {
+  my ($self, $sth) = @_;
+  eval { $sth->finish() if defined $sth; };
+  warning('Cannot finish() the statement handle: $@') if $@;
+  return;
+}
+
+sub _callback_batch {
+  my ($self, $sql, $callback) = @_;
+  my $error;
+  my $sth;
+  my $closure_return;
+  eval {
+    $sth = $self->db_connection()->prepare($sql); 
+    $closure_return = $callback->($sth, $self->db_connection());
+  };
+  $error = $@;
+  $self->_finish_sth($sth);
+	throw("Problem detected during batch work: $error") if $error;
+  
+  return $closure_return if defined $closure_return;
+  return;
+}
+
+sub _data_batch {
+  my ($self, $sql, $data) = @_;
+  
+  #Input checks
+  assert_ref($data, 'ARRAY');
+  my $data_length = scalar(@{$data});
+  return 0 unless $data_length > 0;
+  my $first_row = $data->[0];
+  throw('I expect to work with a 2D ArrayRef but this is not one') unless check_ref($first_row, 'ARRAY');
+
+  my $callback = sub {
+    my ($sth, $dbc) = @_;
+    my $total_affected = 0;
+    #Iterate over each data point
+    for(my $data_index = 0; $data_index < $data_length; $data_index++) {
+      my $row = $data->[$data_index];
+      $self->_bind_params($sth, $row);
+      my $affected = eval {$sth->execute()};
+      if($@) {
+        throw("Problem working with $sql with params @{$row}: $@");
+      }
+      my $num_affected = ($affected) ? $affected :  0; #Get around DBI's 0E0
+      $total_affected += $num_affected;
+    }
+    return $total_affected || 0;
+  };
+  
+  return $self->_callback_batch($sql, $callback)
+}
+
+1;
\ No newline at end of file
diff --git a/modules/t/sqlHelper.t b/modules/t/sqlHelper.t
new file mode 100644
index 0000000000..4afd4962ae
--- /dev/null
+++ b/modules/t/sqlHelper.t
@@ -0,0 +1,143 @@
+#A set tests used to prod the SqlHelper class
+
+use strict;
+use warnings;
+
+use Test::More;
+use Test::Exception;
+use Scalar::Util qw(isweak);
+
+use Bio::EnsEMBL::Test::MultiTestDB;
+use Bio::EnsEMBL::Test::TestUtils;
+use Bio::EnsEMBL::Utils::SqlHelper;
+
+my $multi = Bio::EnsEMBL::Test::MultiTestDB->new();
+my $dba = $multi->get_DBAdaptor( 'core' );
+ok( $dba, 'Test database instatiated' );
+
+#Now start testing the Helper
+dies_ok { Bio::EnsEMBL::DBSQL::SqlHelper->new(-DB_CONNECTION => $dba) } 
+  'Expect to die when we do not give SqlHelper a DBConncetion'; #was given a DBAdaptor
+ok ( 
+  isweak(Bio::EnsEMBL::DBSQL::SqlHelper->new(-DB_CONNECTION => $dba->dbc())->{db_connection}),
+  'Checking DBConnection reference is weak when we ask for it' 
+);
+
+my $helper = Bio::EnsEMBL::DBSQL::SqlHelper->new(-DB_CONNECTION => $dba->dbc());
+ok ( $helper, 'SqlHelper instance was created' );
+
+
+my $meta_key = 'species.common_name';
+diag("Meta key queries working with ${meta_key}. If the tests fail then check for it in the DB dumps");
+
+is( 
+  $helper->execute_single_result(-SQL => qq{select count(*) from meta where meta_key = '$meta_key'}),
+  1,
+  'Checking count of meta key is right with no params'
+);
+
+is( 
+  $helper->execute_single_result(-SQL => 'select count(*) from meta where meta_key =?', -PARAMS => [$meta_key]),
+  1,
+  'Checking count of meta key is right with params'
+);
+
+is_deeply( 
+  $helper->execute(-SQL => 'select count(*), 3 from meta where meta_key =?', -PARAMS => [$meta_key])->[0],
+  [1,3],
+  'Checking 2D mapping of meta key count works'
+);
+
+my $meta_count_hash = $helper->execute_into_hash(
+  -SQL => 'select meta_key, count(*) from meta group by meta_key'
+);
+
+is($meta_count_hash->{$meta_key}, 1, 'Checking hash comes back correctly');
+
+my $meta_table_count = $helper->execute_single_result(-SQL => 'select count(*) from meta');
+my $meta_memoize = $helper->execute(-SQL => 'select * from meta');
+
+is(scalar(@{$meta_memoize}), $meta_table_count, 'All meta items are returned');
+
+$dba->dbc()->do('alter table meta engine=InnoDB');
+
+ok($helper->_perform_transaction_code(), 'This level should do all transaction work');
+
+my $get_value = sub {
+  return $helper->execute_single_result(-SQL => 'select meta_value from meta where meta_key =?', -PARAMS => [$meta_key]);
+};
+
+{
+  #transaction isolation checks
+  throws_ok {
+    $helper->transaction(-CALLBACK => sub {
+      my $sql = 'insert into meta (species_id, meta_key, meta_value) values (?,?,?)';
+      $helper->execute_update(
+        -SQL => $sql,
+        -PARAMS => [2, 'm', '1']
+      );
+      $helper->execute_update(
+        -SQL => $sql,
+        -PARAMS => [2, 'm', '2']
+      );
+      
+      my $count = $helper->execute_single_result(-SQL => 'select count(*) from meta where species_id =?', -PARAMS => [2]);
+      is($count, 2, 'Count should be 2');
+      die 'Dead now';
+    });
+  }qr/Dead now/, 'Died as expected';
+  my $count = $helper->execute_single_result(-SQL => 'select count(*) from meta where species_id =?', -PARAMS => [2]);
+  is($count, 0, 'Count should be 0 as we reset the transaction');
+}
+
+#Testing multiple level isolation (or more that the framework ignores them)
+{
+  my $new_meta_value = 'test';
+  throws_ok {
+  $helper->transaction( -CALLBACK => sub {
+    $helper->execute_update(-SQL => 'update meta set meta_value =? where meta_key =?', -PARAMS => [$new_meta_value, $meta_key]);
+    eval {
+      $helper->transaction(-CALLBACK => sub {
+        ok(!$helper->_perform_transaction_code(), 'This level should not be doing any transaction work');
+        die 'This will not cause the transaction to be aborted';
+      });
+    };
+    is($get_value->(), $new_meta_value, 'The die from the prior transaction should not have triggered a rollback');
+    die('Dead now');
+  });
+  } qr/Dead now/, 'Expected die found';
+  
+  isnt($get_value->(), $new_meta_value, 'Meta value is reset as transaction was aborted');
+  
+  $helper->transaction( -CALLBACK => sub {
+    $helper->execute_update(-SQL => 'delete from meta');
+  });
+  
+  $helper->transaction( -CALLBACK => sub {
+    $helper->batch(-SQL => 'insert into meta values (?,?,?,?)', -DATA => $meta_memoize);
+  });
+  
+  my $new_count_hash = $helper->execute_into_hash(
+    -SQL => 'select meta_key, count(*) from meta group by meta_key'
+  );
+  is_deeply($new_count_hash, $meta_count_hash, 'Counts of meta keys should be the same');
+}
+
+#Doing hashref checks
+{
+  my $sql = 'select meta_key, meta_value from meta where meta_key =?';
+  my $callback = sub {
+    my ($row) = @_;
+    return { name => $row->{meta_value} };
+  };
+  my $array_of_hashes = $helper->execute(
+    -SQL => $sql,
+    -CALLBACK => $callback,
+    -USE_HASHREFS => 1,
+    -PARAMS => ['species.common_name']
+  );
+  is_deeply($array_of_hashes, [ { name => 'Human' } ], 'HashRefs in a callback works');
+}
+
+$dba->dbc()->do('alter table meta engine=MyISAM');
+done_testing();
diff --git a/modules/t/utilsScalar.t b/modules/t/utilsScalar.t
new file mode 100644
index 0000000000..240af5d84a
--- /dev/null
+++ b/modules/t/utilsScalar.t
@@ -0,0 +1,43 @@
+use strict;
+use warnings;
+
+use Test::More;
+use Test::Exception;
+
+use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref);
+use Bio::EnsEMBL::IdMapping::TinyGene;
+
+my $gene = Bio::EnsEMBL::IdMapping::TinyGene->new_fast([]);
+
+dies_ok { assert_ref(undef, 'ARRAY') } 'Undef value results in death';
+dies_ok { assert_ref([], undef) } 'Undef assertion results in death';
+throws_ok { assert_ref('string', 'ARRAY') } qr/produced no type/, 'Passing in a Scalar means death';
+dies_ok { assert_ref(\'', 'ARRAY') } 'Ref of a Scalar is not an ARRAY so death';
+dies_ok { assert_ref($gene, 'CODE') } 'TinyGene object is not a CODE so death';
+dies_ok { assert_ref($gene, 'Bio::EnsEMBL::Feature') } 'TinyGene object is not a Bio::EnsEMBL::Feature so death';
+dies_ok { assert_ref($gene, 'HASH') }  'TinyGene is blessed so we expect false even though it is a HASH';
+
+lives_ok { assert_ref(\'', 'SCALAR') } 'Ref of a Scalar should be a SCALAR';
+lives_ok { assert_ref([], 'ARRAY') } 'Ref of an array should be a ARRAY';
+lives_ok { assert_ref({}, 'HASH') } 'Ref of a hash should be a HASH';
+lives_ok { assert_ref($gene, 'Bio::EnsEMBL::IdMapping::TinyFeature') } 'Ref of a gene should be a TinyFeature';
+lives_ok { assert_ref($gene, 'Bio::EnsEMBL::IdMapping::TinyGene') } 'Ref of a gene should be a TinyGene';
+
+#Now for check_ref
+
+dies_ok { check_ref([], undef) } 'Undef for assertion in check_ref results in death';
+
+ok(! check_ref(undef, 'ARRAY'), 'Undef value returns false');
+ok(! check_ref('string', 'ARRAY'), 'Passing in a Scalar means returns false');
+ok(! check_ref(\'', 'ARRAY'),  'Ref of a Scalar is not an ARRAY so returns false');
+ok(! check_ref($gene, 'CODE'),  'TinyGene object is not a CODE so returns false');
+ok(! check_ref($gene, 'Bio::EnsEMBL::Feature'),  'TinyGene object is not a Bio::EnsEMBL::Feature so returns false');
+ok(! check_ref($gene, 'HASH'),  'TinyGene is blessed so we expect false even though it is a HASH');
+
+ok ( check_ref(\'', 'SCALAR'), 'Ref of a Scalar should be a SCALAR');
+ok ( check_ref([], 'ARRAY'), 'Ref of an array should be a ARRAY');
+ok ( check_ref({}, 'HASH'), 'Ref of a hash should be a HASH');
+ok ( check_ref($gene, 'Bio::EnsEMBL::IdMapping::TinyFeature'), 'Ref of a gene should be a TinyFeature');
+ok ( check_ref($gene, 'Bio::EnsEMBL::IdMapping::TinyGene'), 'Ref of a gene should be a TinyGene');
+
+done_testing();
-- 
GitLab