Addition of utility classes for use in the Ensembl API. The first are...

Addition of utility classes for use in the Ensembl API. The first are shortcuts for asserting object/ref types and the second is a class which removes boilerplate from working with databases.

Addition of utility classes for use in the Ensembl API. The first are...
Addition of utility classes for use in the Ensembl API. The first are shortcuts for asserting object/ref types and the second is a class which removes boilerplate from working with databases.
a3a9331d · Andy Yates · 8521c3be · a3a9331d · a3a9331d · a3a9331d
Commit a3a9331d authored 14 years ago by Andy Yates
--- a/modules/Bio/EnsEMBL/Utils/Scalar.pm
+++ b/modules/Bio/EnsEMBL/Utils/Scalar.pm
+package Bio::EnsEMBL::Utils::Scalar;
+
+=pod
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2010 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <ensembl-dev@ebi.ac.uk>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=pod
+
+=head1 NAME
+
+Bio::EnsEMBL::Utils::Scalar
+
+=head1 SYNOPSIS
+
+	use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref);
+	
+	check_ref([], 'ARRAY'); # Will return true
+	check_ref({}, 'ARRAY'); # Will return false
+	check_ref($dba, 'Bio::EnsEMBL::DBSQL::DBAdaptor'); #Returns true if $dba is a DBAdaptor
+	
+	assert_ref([], 'ARRAY'); #Returns true
+	assert_ref({}, 'ARRAY'); #throws an exception
+	assert_ref($dba, 'Bio::EnsEMBL::Gene'); #throws an exception if $dba is not a Gene  
+	
+=head1 DESCRIPTION
+
+A collection of subroutines aimed to helping Scalar based operations
+
+=head1 METHODS
+
+See subroutines.
+
+=head1 MAINTAINER
+
+$Author$
+
+=head1 VERSION
+
+$Revision$
+
+=cut
+
+use strict;
+use warnings;
+
+use base qw(Exporter);
+our @EXPORT_OK = qw(check_ref assert_ref);
+
+use Bio::EnsEMBL::Utils::Exception qw(throw);
+use Scalar::Util qw(blessed);
+
+=head2 check_ref()
+
+  Arg [1]     : The reference to check
+  Arg [2]     : The type we expect
+  Description : A subroutine which checks to see if the given object/ref is 
+                what you expect. If you give it a blessed reference then it 
+                will perform an isa() call on the object after the defined 
+                tests. If it is a plain reference then it will use ref().
+                
+                An undefined value will return a false.
+  Returntype  : Boolean indicating if the reference was the type we 
+                expect
+  Example     : my $ok = check_ref([], 'ARRAY');
+  Exceptions  : If the expected type was not set
+  Status      : Stable
+
+=cut
+
+sub check_ref {
+	my ($ref, $expected) = @_;
+	throw('No expected type given') if ! defined $expected;
+	if(defined $ref) {
+		if(blessed($ref)) {
+			return 1 if $ref->isa($expected);
+		}
+		else {
+			my $ref_ref_type = ref($ref);
+			return 1 if defined $ref_ref_type && $ref_ref_type eq $expected; 
+		}
+	}
+	return 0;
+}
+
+=head2 assert_ref()
+
+  Arg [1]     : The reference to check
+  Arg [2]     : The type we expect
+  Description : A subroutine which checks to see if the given object/ref is 
+                what you expect. This behaves in an identical manner as
+                C<check_ref()> does except this will raise exceptions when
+                the values do not match rather than returning a boolean
+                indicating the situation.
+                
+                Undefs cause exception circumstances.
+  Returntype  : None
+  Example     : assert_ref([], 'ARRAY');
+  Exceptions  : If the expected type was not set and if the given reference
+                was not assignable to the expected value
+  Status      : Stable
+
+=cut
+
+sub assert_ref {
+  my ($ref, $expected) = @_;
+  throw('No expected type given') if ! defined $expected;
+  my $class = ref($ref);
+  throw('Given reference was undef') unless defined $ref;
+  throw('Asking for the type of the reference produced no type; check your input is a reference') unless $class;
+  if(blessed($ref)) {
+    throw("Reference '${class}' is not an ISA of '${expected}'") if ! $ref->isa($expected);
+  }
+  else {    
+    throw("'${expected}' expected class was not equal to actual class '${class}'") if $expected ne $class;
+  }
+  return 1;
+}
+
+1;
--- a/modules/Bio/EnsEMBL/Utils/SqlHelper.pm
+++ b/modules/Bio/EnsEMBL/Utils/SqlHelper.pm
+package Bio::EnsEMBL::Utils::SqlHelper;
+
+=pod
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2010 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <ensembl-dev@ebi.ac.uk>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+  
+=cut
+
+=pod
+
+=head1 NAME
+
+Bio::EnsEMBL::Utils::SqlHelper
+
+=head1 VERSION
+
+$Revision$
+
+=head1 SYNOPSIS
+
+	use Bio::EnsEMBL::Utils::SqlHelper;
+
+	my $helper = Bio::EnsEMBL::Utils::SqlHelper->new(-DB_CONNECTION => $dbc);
+	my $arr_ref = $helper->execute(
+		-SQL => 'select name, age from tab where col =?',
+		-CALLBACK => sub {
+			my @row = @{shift @_};
+			return {name=>$row[0], age=>$row[1]};
+		},
+		-PARAMS => ['A']
+	);
+
+	use Data::Dumper;
+	print Dumper($arr_ref), "\n";
+	#Prints out [name=>'name', age=>1] maybe ....
+	
+	#For transactional work; only works if your MySQL table 
+	#engine/database supports transactional work (such as InnoDB)
+	$helper->transaction( -CALLBACK => sub {
+	  if($helper->execute_single_result(-SQL => 'select count(*) from tab')) {
+	    return $helper->execute_update('delete from tab);
+	  }
+	  else {
+	    return $helper->batch(-SQL => 'insert into tab (?,?)', -DATA => [
+	     [1,2],
+	     [1,3],
+	     [1,4]
+	    ]);
+	  }
+	});
+
+=head1 DESCRIPTION
+
+Easier database interaction
+
+=head1 COMMITTER
+
+$Author$
+
+=head1 METHODS
+
+See subrotuines.
+
+=cut
+
+use warnings;
+use strict;
+
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use English qw( -no_match_vars ); #Used for $PROCESS_ID
+use Scalar::Util qw(weaken); #Used to not hold a strong ref to DBConnection
+
+=pod
+
+=head2 new()
+
+  Arg [DB_CONNECTION] : DBConnection instance to use
+  Returntype          : Instance of helper
+  Exceptions          : If the object given as a DBConnection is not one
+  Status              : Stable
+
+Creates a new instance of this object.
+
+	my $dba = get_dba('mydb'); # New DBAdaptor from somewhere
+	my $helper = Bio::EnsEMBL::Utils::SqlHelper->new(-DB_CONNECTION => $dba->dbc());
+	$helper->execute_update(-SQL => 'update tab set flag=?', -PARAMS => [1]);
+
+=cut
+
+sub new {
+	my ( $class, @args ) = @_;
+	
+	my ($db_connection) = rearrange([qw(db_connection)], @args);
+	 
+	my $self = bless( {}, ref($class) || $class );
+	$self->db_connection($db_connection);
+	
+	return $self;
+}
+
+=pod
+
+  Arg [1]     : DBConnection instance to use
+  Description : Sets and retrieves the DBConnection 
+  Returntype  : DBConnection if set; otherwise undef
+  Exceptions  : If the object given as a DBConnection is not one or if an 
+                attempt is made to set the value more than once
+  Status      : Stable
+
+=cut
+
+sub db_connection {
+  my ($self, $db_connection) = @_;
+  if(defined $db_connection) {
+    if(exists $self->{db_connection}) {
+      throw('Cannot reset the DBConnection object; already defined ');
+    }
+    assert_ref($db_connection, 'Bio::EnsEMBL::DBSQL::DBConnection');
+    $self->{db_connection} = $db_connection;
+    weaken $self->{db_connection};
+  }
+  return $self->{db_connection};
+}
+
+# --------- SQL Methods
+
+=pod
+
+=head2 execute() - Execute a SQL statement with a custom row handler
+
+  Arg [SQL]           : SQL to execute
+  Arg [CALLBACK]      : The callback to use for mapping a row to a data point; 
+                        leave blank for a default mapping to a 2D array
+  Arg [USE_HASHREFS]  : If set to true will cause HashRefs to be returned 
+                        to the callback & not ArrayRefs
+  Arg [PARAMS]        : The binding parameters to the SQL statement
+  Returntype : 2D array containing the return of the callback
+  Exceptions : If errors occur in the execution of the SQL
+  Status     : Stable
+
+	my $arr_ref = $helper->execute(
+		-SQL      => 'select a,b,c from tab where col =?',
+		-CALLBACK => sub {
+			my @row = @{shift @_};
+			return {A=>$row[0], B=>$row[1], C=>$row[2]};
+		},
+		-PARAMS   => ['A']
+	);
+	
+	#Or with hashrefs
+	my $arr_ref = $helper->execute(
+		-SQL          => 'select a,b,c from tab where col =?',
+		-USE_HASHREFS => 1,
+		-PARAMS       => ['A'],
+		-CALLBACK     => sub {
+			my $row = shift @_;
+			return {A=>$row->{a}, B=>$row->{b}, C=>$row->{c}};
+		}
+	);
+
+Uses a callback defined by the C<sub> decalaration. Here we specify how the 
+calling code will deal with each row of a database's result set. The sub 
+can return any type of Object/hash/data structure you require.
+
+Should you not specify a callback then a basic one will be assigned to you
+which will return a 2D array structure e.g.
+
+  my $arr_ref = $helper->execute(
+		-SQL    => 'select a,b,c from tab where col =?',
+		-PARAMS => ['A']
+	);
+	
+This is equivalent to DBI's c<selectall_arrayref()> subroutine.
+
+As an extension to this method you can write a closure subroutine which
+takes in two parameters. The first is the array/hash reference & the second is
+the statement handle used to execute. 99% of the time you will not need
+it but there are occasions where you do need it. An example of usage would be:
+
+	my $conn = get_conn(); #From somwewhere
+	my $arr_ref = $conn->execute(
+		-SQL          => 'select a,b,c from tab where col =?',
+		-USE_HASHREFS => 1,
+		-PARAMS       => ['A'],
+		-CALLBACK     => sub {
+			my ($row, $sth) = @_;
+			#Then do something with sth
+			return {A=>$row->[0], B=>$row->[1], C=>$row->[2]};
+		}
+	);
+
+Any arguments to bind to the incoming statement. This can be a set of scalars
+or a 2D array if you need to specify any kind of types of sql objects i.e.
+
+	use DBI qw(:sql_types);
+
+	my $conn = get_conn();
+	my $arr_ref = $conn->execute(
+		-SQL => 'select a,b,c from tab where col =? and num_col=? and other=?',
+		-USE_HASHREFS => 1,
+		-CALLBACK => sub {
+			my @row = @{shift @_};
+			return {A=>$row[0], B=>$row[1], C=>$row[2]};
+		},
+		-PARAMS => ['1', SQL_VARCHAR], [2, SQL_INTEGER], 'hello'
+	);
+
+Here we import DBI's sql types into our package and then pass in multiple
+anonymous array references as parameters. Each param is tested in the input
+and if it is detected to be an ARRAY reference we dereference the array and
+run DBI's bind_param method. In fact you can see each part of the incoming
+paramaters array as the contents to call C<bind_param> with. The only difference
+is the package tracks the bind position for you.
+
+=cut
+
+sub execute {
+	my ( $self, @args ) = @_;
+	my ($sql, $callback, $use_hashrefs, $params) = rearrange([qw(sql callback use_hashrefs params)], @args);
+	my $has_return = 1;
+	
+	#If no callback then we execute using a default one which returns a 2D array
+	if(!defined $callback) {
+    throw('Cannot use fetchrow_hashref() with default mappers. Turn off this option') if $use_hashrefs;
+    $callback = $self->_mappers()->{array_ref};
+	}
+	
+	return $self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params );
+}
+
+=pod
+
+=head2 execute_simple()
+
+  Arg [SQL]           : SQL to execute
+  Arg [PARAMS]        : The binding parameters to the SQL statement
+  Returntype : 1D array of data points
+  Exceptions : If errors occur in the execution of the SQL
+  Status     : Stable
+
+  my $classification = $helper->execute_simple(
+    -SQL    => 'select meta_val from meta where meta_key =? order by meta_id', 
+    -PARAMS => ['species.classification']
+  );
+
+Identical to C<execute> except you do not specify a sub-routine reference. 
+Using this code assumes you want an array of single scalar values as returned 
+by the given SQL statement.
+
+=cut
+
+sub execute_simple {
+  my ( $self, @args ) = @_;
+	my ($sql, $params) = rearrange([qw(sql params)], @args);
+	my $has_return = 1;
+	my $use_hashrefs = 0;
+	my $callback = $self->_mappers()->{first_element};
+	return $self->_execute($sql, $callback, $has_return, $use_hashrefs, $params);
+}
+
+=pod
+
+=head2 execute_no_return()
+
+  Arg [SQL]           : SQL to execute
+  Arg [CALLBACK]      : The callback to use for mapping a row to a data point;
+                        we assume you are assigning into a data structure which
+                        has requirements other than simple translation into an
+                        array
+  Arg [USE_HASHREFS]  : If set to true will cause HashRefs to be returned 
+                        to the callback & not ArrayRefs
+  Arg [PARAMS]        : The binding parameters to the SQL statement
+  Returntype : None
+  Exceptions : If errors occur in the execution of the SQL
+  Status     : Stable
+
+Whilst all other execute methods will return something; this assumes that the
+given mapper subroutine will be performing the business of placing values
+somewhere or doing something with them.
+
+There is a huge temptation to nest queries using this method; do not! Execute
+the values into an array using one of the other methods then run your subqueries
+on them; or make a better first query. SQL is flexible; so use it.
+
+=cut
+
+sub execute_no_return {
+	my ( $self, @args ) = @_;
+	my ($sql, $callback, $use_hashrefs, $params) = rearrange([qw(sql callback use_hashrefs params)], @args);
+	throw('No callback defined but this is a required parameter for execute_no_return()') if ! $callback;
+	my $has_return = 0;
+	$self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params );
+	return;
+}
+
+=pod
+
+=head2 execute_into_hash()
+
+  Arg [SQL]       : SQL to execute
+  Arg [CALLBACK]  : The callback to use for mapping to a value in a hash
+                    keyed by the first element in your result set; 
+                    leave blank for a default mapping to a scalar value
+                    of the second element
+  Arg [PARAMS]    : The binding parameters to the SQL statement
+  Returntype      : A HashRef keyed by column 1 & value is the return of
+                    the callback
+  Exceptions      : If errors occur in the execution of the SQL
+  Status          : Stable
+
+A variant of the execute methods but rather than returning a list of mapped
+results this will assume the first column of a returning map & the calling
+subroutine will map the remainder of your return as the hash's key. For example:
+
+	my $sql = 'select key, one, two from table where something =?';
+	my $mapper = sub {
+		my ($row) = @_;
+		#Ignore field 0 as that is being used for the key
+		my $obj = Some::Obj->new(one=>$row->[1], two=>$row->[2]);
+		return $obj;
+	};
+
+	my $hash = $helper->execute_into_hash(-SQL => $sql, -CALLBACK => $mapper, -PARAMS => ['val']);
+	
+	#Or for a more simple usage
+	my $sql = 'select biotype, count(gene_id) from gene group by biotype';
+	my $biotype_hash = $conn->execute_into_hash(-SQL => $sql);
+	print $biotype_hash->{protein_coding} || 0, "\n";
+
+The basic pattern assumes a scenario where you are mapping in a one key to
+one value. For more advanced mapping techniques you need to start using the
+non-consuming executes which allow you to process a result set without assuming
+that you want to map the rows into single objects.
+
+B<Remember that the row you are given is the full row & not a view of the
+reminaing fields.> Therefore indexing for the data you are concerned with
+begins at position 1.
+
+=cut
+
+sub execute_into_hash {
+	my ( $self, @args ) = @_;
+	my ($sql, $callback, $params) = rearrange([qw(sql callback params)], @args);
+	my $hash = {};
+	
+	#If no callback then we execute using a default one which sets value to 2nd element
+	if(!defined $callback) {
+	 $callback = $self->_mappers()->{second_element};
+	}
+	
+	#Default mapper uses the 1st key + something else from the mapper
+	my $mapper = sub {
+		my $row   = shift @_;
+		my $value = $callback->($row);
+		$hash->{ $row->[0] } = $value;
+		return;
+	};
+	
+	$self->execute_no_return(
+	  -SQL => $sql, 
+	  -CALLBACK => $mapper,
+	  -PARAMS => $params
+	);
+	
+	return $hash;
+}
+
+=pod
+
+=head2 execute_single_result()
+
+  Arg [SQL]           : SQL to execute
+  Arg [CALLBACK]      : The callback to use for mapping a row to a data point; 
+                        leave blank for a default scalar mapping
+  Arg [USE_HASHREFS]  : If set to true will cause HashRefs to be returned 
+                        to the callback & not ArrayRefs
+  Arg [PARAMS]        : The binding parameters to the SQL statement
+  Returntype          : One data point
+  Exceptions          : If errors occur in the execution of the SQL
+  Status              : Stable
+
+  my $meta_count = $helper->execute_single_result(
+    -SQL => 'select count(*) from meta where species_id =?', 
+    -PARAMS => [1]
+  );
+
+Very similar to C<execute()> except it will raise an exception if we have more 
+or less than one row returned
+
+=cut
+
+sub execute_single_result {
+	my ( $self, @args ) = @_;
+	my ($sql, $callback, $use_hashrefs, $params) = rearrange(
+	 [qw(sql callback use_hashrefs params)], @args);
+	
+	my $results = $self->execute_simple( 
+	  -SQL => $sql, 
+	  -CALLBACK => $callback, 
+	  -USE_HASHREFS => $use_hashrefs, 
+	  -PARAMS => $params
+	);
+	
+	my $result_count = scalar(@{$results});
+	if($result_count != 1) {
+	  $params = [] if ! $params;
+	  my $type = ($result_count == 0) ? 'No' : 'Too many';
+		my $msg = "${type} results returned. Expected 1 but got $result_count for query '${sql}' with params [";
+		$msg .= join( ',', map {(defined $_) ? $_ : '-undef-';} @{$params} );
+		$msg .= ']';
+		throw($msg);
+	}
+	return $results->[0];
+}
+
+=pod
+
+=head2 transaction()
+
+  Arg [CALLBACK]  : The callback used for transaction isolation; once 
+                    the subroutine exists the code will decide on rollback
+                    or commit
+  Returntype      : Return of the callback
+  Exceptions      : If errors occur in the execution of the SQL
+  Status          : Stable
+
+  my $val = $helper->transaction(-CALLBACK => sub {
+    my ($dbc) = @_;
+    #Do something
+    return 1;
+  });
+  
+  #Or because of the arguments method we use
+  my $val = $helper->transaction(sub {
+    my ($dbc) = @_;
+    #Do something
+    return 1;
+  });
+
+Creates a transactional block which will ensure that the connection is committed
+when your submmited subroutine has finished or will rollback in the event of
+an error occuring in your block.
+
+The code will always force AutoCommit off but will restore it to its 
+previous setting. If your DBI/DBD driver does not support manual commits
+then this code will break. The code will turn off the 
+C<disconnect_when_idle()> method to allow transactions to work as expected.
+
+Creating a transaction within a transaction results in the commit rollback 
+statements occuring in the top level transaction. That way any block of 
+code which is meant to to be transaction can be wrapped in this block (
+assuming the same instance of SQLHelper is passed around & used).
+
+=cut
+
+sub transaction {
+  my ($self, @args) = @_;
+  
+  my ($callback) = rearrange([qw(callback)], @args);
+  
+  throw('Callback was not a CodeRef. Got a reference of type ['.ref($callback).']') 
+    unless check_ref($callback, 'CODE');
+ 
+  my $dbc = $self->db_connection();
+  my $original_dwi;
+  my $ac;
+  
+  #If we were already in a transaction then we do not do any management of the
+  #session & wait for the parent transaction(s) to finish
+  my $perform_transaction = $self->_perform_transaction_code();
+  if($perform_transaction) {
+    $original_dwi = $dbc->disconnect_when_inactive();
+    $ac = $dbc->db_handle()->{'AutoCommit'};
+    $dbc->db_handle()->{'AutoCommit'} = 0;
+    $self->_enable_transaction();
+  }
+  
+  my $error;
+  my $result;
+  
+  eval {
+    $result = $callback->($dbc);
+    $dbc->db_handle()->commit() if $perform_transaction;
+  };
+  $error = $@;
+  
+  if($perform_transaction) {
+    if($error) {
+      eval { $dbc->db_handle()->rollback(); };
+    }
+    $dbc->db_handle()->{'AutoCommit'} = $ac;
+    $dbc->disconnect_when_inactive($original_dwi);
+    $self->_disable_transaction();
+  }
+  
+  throw("Transaction aborted because of error: ${error}") if $error;
+  
+  return $result;
+}
+
+=pod
+
+=head2 execute_update()
+
+  Arg [SQL]       : SQL to execute
+  Arg [CALLBACK]  : The callback to use for calling methods on the 
+                    DBI statement handle or DBConnection object after an 
+                    update command
+  Arg [PARAMS]    : The binding parameters to the SQL statement
+  Returntype      : Number of rows affected
+  Exceptions      : If errors occur in the execution of the SQL
+  Status          : Stable
+
+Used for performing updates but conforms to the normal execute statement
+subroutines.
+
+	use DBI qw(:sql_types);
+	$helper->execute_update(
+	 -SQL => 'update tab set name = ? where id =?', 
+	 -PARAMS => ['andy', [1, SQL_INTEGER]]
+	);
+
+If you need to do something a bit more advanced with your DML then you can
+give the method a closure and this will be called after the execute has been
+issued i.e.
+
+	my $obj;
+	$helper->execute_update(
+	 -SQL => 'insert into tab (name) values(?)', 
+	 -CALLBACK => sub {
+	   my ($sth, $dbc) = @_;
+	   $obj->{id} = $dbh->{mysql_insertid);
+   }, 
+   -PARAMS => [$obj->name()]
+  );
+
+This lets us access the statement handle & database handle to access other
+properties such as the last identifier inserted.
+
+=cut
+
+sub execute_update {
+  my ($self, @args) = @_;
+  my ($sql, $callback, $params) = rearrange([qw(sql callback params)], @args);
+  my $rv = 0;
+  my $sth;
+  eval {
+    $sth = $self->db_connection()->prepare($sql);
+    $self->_bind_params($sth, $params);
+    $rv = $sth->execute();
+    $callback->($sth, $self->db_connection()) if $callback;
+  };
+  my $error = $@;
+  $self->_finish_sth($sth);
+  if($error) {
+    my $params = join ' ', map { (defined $_) ? $_ : q{undef} } @{$params};
+    throw("Cannot apply sql '${sql}' with params '${params}': ${error}");
+  }
+  return $rv;
+}
+
+=pod
+
+=head1 batch()
+
+  Arg [SQL]       : SQL to execute
+  Arg [CALLBACK]  : The callback to use for binding the data to execute in
+                    the batch statement; optional (if you specify DATA)
+  Arg [DATA]      : The data to use in the batch statement; optional (if you
+                    specify CALLBACK)
+  Returntype      : Number of rows affected
+  Exceptions      : If errors occur in the execution of the SQL
+  Status          : Stable
+
+  my $alotofdata = getitfromsomewhere();
+  $helper->batch(-SQL => 'insert into table (one,two) values(?,?)', -CALLBACk => sub {
+    my ($sth, $dbc) = @_;
+    foreach my $data (@alotofdata) {
+      $sth->execute(@{$data});
+    }
+  });
+  
+  #Or for a 2D array data driven approach
+  $helper->batch(-SQL => 'insert into table (one,two) values(?,?)', -DATA => $alotofdata);
+
+Takes in a sql statement & a code reference. Your SQL is converted into a 
+prepared statement & then given as the first parameter to the closure. The
+second parameter is the DBH which created the statement. This is intended
+to let you do mass insertion into a database without the need to
+re-preparing the same statement.
+
+This can be combined with the transaction() code to provide a construct
+which does batch insertion & is transactionally aware.
+
+We can also use data based batch insertions i.e.
+
+  #Needs to be like:
+  #   [ [1,2], [3,4] ]
+  #Or if using the DBI types: 
+  #   [ [ [1, SQL_INTEGER], [2, SQL_INTEGER] ], [ [3, SQL_INTEGER], [4, SQL_INTEGER] ] ]
+  my $alotofdata = getitfromsomewhere(); 
+  $helper->batch(-SQL => 'insert into table (one,two) values(?,?)', 
+    -DATA => $alotofdata);
+  
+This does exactly what the previous example.
+
+All batch statements will return the value the callback computes. If you are 
+using the previous example with a data array then the code will return the
+number affected rows by the query. 
+
+=cut
+
+sub batch {
+  my ($self, @args) = @_;
+  my ($sql, $callback, $data) = rearrange([qw(sql callback data)], @args);
+  
+  if(! defined $callback && ! defined $data) {
+    throw('You need to define a callback for insertion work or the 2D data array');
+  }
+  
+  my $result;
+  if(defined $callback) {
+    $result = $self->_callback_batch($sql, $callback);
+  }
+  else {
+    $result = $self->_data_batch($sql, $data);
+  }
+  return $result if defined $result;
+  return;
+}
+
+#------- Internal methods
+
+sub _mappers {
+  my ($self) = @_;
+  if(! exists $self->{_mappers}) {
+    $self->{_mappers} = {
+      first_element => sub {
+        my ($row) = @_;
+        return $row->[0];
+      },
+      second_element => sub {
+        my ($row) = @_;
+        return $row->[1];
+      },
+      array_ref => sub {
+        my $row = shift @_;
+	     return [@{$row}];
+      }
+    };
+  }
+  return $self->{_mappers};
+}
+
+sub _perform_transaction_code {
+  my ($self) = @_;
+  return $self->{_transaction_active}->{$PROCESS_ID} ? 0 : 1;
+}
+
+sub _enable_transaction {
+  my ($self) = @_;
+  $self->{_transaction_active}->{$PROCESS_ID} = 1;
+  return;
+}
+
+sub _disable_transaction {
+  my ($self) = @_;
+  delete $self->{_transaction_active}->{$PROCESS_ID};
+  return;
+}
+
+sub _bind_params {
+	my ( $self, $sth, $params ) = @_;
+	
+	return if ! defined $params; #Return quickly if we had no data
+	
+	if(! check_ref($params, 'ARRAY')) {
+	  throw(qq{The given parameters reference '${params}' is not an ARRAY; wrap in an ArrayRef});
+	}
+	
+	my $count = 1;
+	foreach my $param (@{$params}) {
+		if ( check_ref($param, 'ARRAY') ) {
+			$sth->bind_param( $count, @{$param} );
+		}
+		else {
+			$sth->bind_param( $count, $param );
+		}
+		$count++;
+	}
+	return;
+}
+
+sub _execute {
+	my ( $self, $sql, $callback, $has_return, $use_hashrefs, $params ) = @_;
+	
+	throw('Not given a mapper. _execute() must always been given a CodeRef') unless check_ref($callback, 'CODE');
+	
+	$params = [] unless $params;
+	
+	my $conn = $self->db_connection;
+	my @results;
+
+	my $error;
+	my $sth_close_error;
+	my $sth;
+
+	eval {
+		$sth = $conn->prepare($sql);
+		throw("Cannot continue as prepare() did not return a handle") unless $sth;
+		$self->_bind_params( $sth, $params );
+		$sth->execute();
+		if($use_hashrefs) {
+		  while( my $row = $sth->fetchrow_hashref() ) {
+		    push(@results, $callback->($row, $sth));
+		  }
+		}
+		else {
+      while ( my $row = $sth->fetchrow_arrayref() ) {
+  			push(@results, $callback->($row, $sth));
+  		}
+		}
+	};
+	
+	$error = $@;
+	$self->_finish_sth($sth);
+	if($error) {
+  	throw("Cannot run '${sql}' with params '@{$params}' due to error: $error") if $error;
+	}
+	return \@results if $has_return;
+	return;
+}
+
+sub _finish_sth {
+  my ($self, $sth) = @_;
+  eval { $sth->finish() if defined $sth; };
+  warning('Cannot finish() the statement handle: $@') if $@;
+  return;
+}
+
+sub _callback_batch {
+  my ($self, $sql, $callback) = @_;
+  my $error;
+  my $sth;
+  my $closure_return;
+  eval {
+    $sth = $self->db_connection()->prepare($sql); 
+    $closure_return = $callback->($sth, $self->db_connection());
+  };
+  $error = $@;
+  $self->_finish_sth($sth);
+	throw("Problem detected during batch work: $error") if $error;
+  
+  return $closure_return if defined $closure_return;
+  return;
+}
+
+sub _data_batch {
+  my ($self, $sql, $data) = @_;
+  
+  #Input checks
+  assert_ref($data, 'ARRAY');
+  my $data_length = scalar(@{$data});
+  return 0 unless $data_length > 0;
+  my $first_row = $data->[0];
+  throw('I expect to work with a 2D ArrayRef but this is not one') unless check_ref($first_row, 'ARRAY');
+
+  my $callback = sub {
+    my ($sth, $dbc) = @_;
+    my $total_affected = 0;
+    #Iterate over each data point
+    for(my $data_index = 0; $data_index < $data_length; $data_index++) {
+      my $row = $data->[$data_index];
+      $self->_bind_params($sth, $row);
+      my $affected = eval {$sth->execute()};
+      if($@) {
+        throw("Problem working with $sql with params @{$row}: $@");
+      }
+      my $num_affected = ($affected) ? $affected :  0; #Get around DBI's 0E0
+      $total_affected += $num_affected;
+    }
+    return $total_affected || 0;
+  };
+  
+  return $self->_callback_batch($sql, $callback)
+}
+
+1;
\ No newline at end of file
--- a/modules/t/sqlHelper.t
+++ b/modules/t/sqlHelper.t
+#A set tests used to prod the SqlHelper class
+
+use strict;
+use warnings;
+
+use Test::More;
+use Test::Exception;
+use Scalar::Util qw(isweak);
+
+use Bio::EnsEMBL::Test::MultiTestDB;
+use Bio::EnsEMBL::Test::TestUtils;
+use Bio::EnsEMBL::Utils::SqlHelper;
+
+my $multi = Bio::EnsEMBL::Test::MultiTestDB->new();
+my $dba = $multi->get_DBAdaptor( 'core' );
+ok( $dba, 'Test database instatiated' );
+
+#Now start testing the Helper
+dies_ok { Bio::EnsEMBL::DBSQL::SqlHelper->new(-DB_CONNECTION => $dba) } 
+  'Expect to die when we do not give SqlHelper a DBConncetion'; #was given a DBAdaptor
+ok ( 
+  isweak(Bio::EnsEMBL::DBSQL::SqlHelper->new(-DB_CONNECTION => $dba->dbc())->{db_connection}),
+  'Checking DBConnection reference is weak when we ask for it' 
+);
+
+my $helper = Bio::EnsEMBL::DBSQL::SqlHelper->new(-DB_CONNECTION => $dba->dbc());
+ok ( $helper, 'SqlHelper instance was created' );
+
+
+my $meta_key = 'species.common_name';
+diag("Meta key queries working with ${meta_key}. If the tests fail then check for it in the DB dumps");
+
+is( 
+  $helper->execute_single_result(-SQL => qq{select count(*) from meta where meta_key = '$meta_key'}),
+  1,
+  'Checking count of meta key is right with no params'
+);
+
+is( 
+  $helper->execute_single_result(-SQL => 'select count(*) from meta where meta_key =?', -PARAMS => [$meta_key]),
+  1,
+  'Checking count of meta key is right with params'
+);
+
+is_deeply( 
+  $helper->execute(-SQL => 'select count(*), 3 from meta where meta_key =?', -PARAMS => [$meta_key])->[0],
+  [1,3],
+  'Checking 2D mapping of meta key count works'
+);
+
+my $meta_count_hash = $helper->execute_into_hash(
+  -SQL => 'select meta_key, count(*) from meta group by meta_key'
+);
+
+is($meta_count_hash->{$meta_key}, 1, 'Checking hash comes back correctly');
+
+my $meta_table_count = $helper->execute_single_result(-SQL => 'select count(*) from meta');
+my $meta_memoize = $helper->execute(-SQL => 'select * from meta');
+
+is(scalar(@{$meta_memoize}), $meta_table_count, 'All meta items are returned');
+
+$dba->dbc()->do('alter table meta engine=InnoDB');
+
+ok($helper->_perform_transaction_code(), 'This level should do all transaction work');
+
+my $get_value = sub {
+  return $helper->execute_single_result(-SQL => 'select meta_value from meta where meta_key =?', -PARAMS => [$meta_key]);
+};
+
+{
+  #transaction isolation checks
+  throws_ok {
+    $helper->transaction(-CALLBACK => sub {
+      my $sql = 'insert into meta (species_id, meta_key, meta_value) values (?,?,?)';
+      $helper->execute_update(
+        -SQL => $sql,
+        -PARAMS => [2, 'm', '1']
+      );
+      $helper->execute_update(
+        -SQL => $sql,
+        -PARAMS => [2, 'm', '2']
+      );
+      
+      my $count = $helper->execute_single_result(-SQL => 'select count(*) from meta where species_id =?', -PARAMS => [2]);
+      is($count, 2, 'Count should be 2');
+      die 'Dead now';
+    });
+  }qr/Dead now/, 'Died as expected';
+  my $count = $helper->execute_single_result(-SQL => 'select count(*) from meta where species_id =?', -PARAMS => [2]);
+  is($count, 0, 'Count should be 0 as we reset the transaction');
+}
+
+#Testing multiple level isolation (or more that the framework ignores them)
+{
+  my $new_meta_value = 'test';
+  throws_ok {
+  $helper->transaction( -CALLBACK => sub {
+    $helper->execute_update(-SQL => 'update meta set meta_value =? where meta_key =?', -PARAMS => [$new_meta_value, $meta_key]);
+    eval {
+      $helper->transaction(-CALLBACK => sub {
+        ok(!$helper->_perform_transaction_code(), 'This level should not be doing any transaction work');
+        die 'This will not cause the transaction to be aborted';
+      });
+    };
+    is($get_value->(), $new_meta_value, 'The die from the prior transaction should not have triggered a rollback');
+    die('Dead now');
+  });
+  } qr/Dead now/, 'Expected die found';
+  
+  isnt($get_value->(), $new_meta_value, 'Meta value is reset as transaction was aborted');
+  
+  $helper->transaction( -CALLBACK => sub {
+    $helper->execute_update(-SQL => 'delete from meta');
+  });
+  
+  $helper->transaction( -CALLBACK => sub {
+    $helper->batch(-SQL => 'insert into meta values (?,?,?,?)', -DATA => $meta_memoize);
+  });
+  
+  my $new_count_hash = $helper->execute_into_hash(
+    -SQL => 'select meta_key, count(*) from meta group by meta_key'
+  );
+  is_deeply($new_count_hash, $meta_count_hash, 'Counts of meta keys should be the same');
+}
+
+#Doing hashref checks
+{
+  my $sql = 'select meta_key, meta_value from meta where meta_key =?';
+  my $callback = sub {
+    my ($row) = @_;
+    return { name => $row->{meta_value} };
+  };
+  my $array_of_hashes = $helper->execute(
+    -SQL => $sql,
+    -CALLBACK => $callback,
+    -USE_HASHREFS => 1,
+    -PARAMS => ['species.common_name']
+  );
+  is_deeply($array_of_hashes, [ { name => 'Human' } ], 'HashRefs in a callback works');
+}
+
+$dba->dbc()->do('alter table meta engine=MyISAM');
+done_testing();
--- a/modules/t/utilsScalar.t
+++ b/modules/t/utilsScalar.t
+use strict;
+use warnings;
+
+use Test::More;
+use Test::Exception;
+
+use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref);
+use Bio::EnsEMBL::IdMapping::TinyGene;
+
+my $gene = Bio::EnsEMBL::IdMapping::TinyGene->new_fast([]);
+
+dies_ok { assert_ref(undef, 'ARRAY') } 'Undef value results in death';
+dies_ok { assert_ref([], undef) } 'Undef assertion results in death';
+throws_ok { assert_ref('string', 'ARRAY') } qr/produced no type/, 'Passing in a Scalar means death';
+dies_ok { assert_ref(\'', 'ARRAY') } 'Ref of a Scalar is not an ARRAY so death';
+dies_ok { assert_ref($gene, 'CODE') } 'TinyGene object is not a CODE so death';
+dies_ok { assert_ref($gene, 'Bio::EnsEMBL::Feature') } 'TinyGene object is not a Bio::EnsEMBL::Feature so death';
+dies_ok { assert_ref($gene, 'HASH') }  'TinyGene is blessed so we expect false even though it is a HASH';
+
+lives_ok { assert_ref(\'', 'SCALAR') } 'Ref of a Scalar should be a SCALAR';
+lives_ok { assert_ref([], 'ARRAY') } 'Ref of an array should be a ARRAY';
+lives_ok { assert_ref({}, 'HASH') } 'Ref of a hash should be a HASH';
+lives_ok { assert_ref($gene, 'Bio::EnsEMBL::IdMapping::TinyFeature') } 'Ref of a gene should be a TinyFeature';
+lives_ok { assert_ref($gene, 'Bio::EnsEMBL::IdMapping::TinyGene') } 'Ref of a gene should be a TinyGene';
+
+#Now for check_ref
+
+dies_ok { check_ref([], undef) } 'Undef for assertion in check_ref results in death';
+
+ok(! check_ref(undef, 'ARRAY'), 'Undef value returns false');
+ok(! check_ref('string', 'ARRAY'), 'Passing in a Scalar means returns false');
+ok(! check_ref(\'', 'ARRAY'),  'Ref of a Scalar is not an ARRAY so returns false');
+ok(! check_ref($gene, 'CODE'),  'TinyGene object is not a CODE so returns false');
+ok(! check_ref($gene, 'Bio::EnsEMBL::Feature'),  'TinyGene object is not a Bio::EnsEMBL::Feature so returns false');
+ok(! check_ref($gene, 'HASH'),  'TinyGene is blessed so we expect false even though it is a HASH');
+
+ok ( check_ref(\'', 'SCALAR'), 'Ref of a Scalar should be a SCALAR');
+ok ( check_ref([], 'ARRAY'), 'Ref of an array should be a ARRAY');
+ok ( check_ref({}, 'HASH'), 'Ref of a hash should be a HASH');
+ok ( check_ref($gene, 'Bio::EnsEMBL::IdMapping::TinyFeature'), 'Ref of a gene should be a TinyFeature');
+ok ( check_ref($gene, 'Bio::EnsEMBL::IdMapping::TinyGene'), 'Ref of a gene should be a TinyGene');
+
+done_testing();