From a3a9331d29b06b19b80d06394f73e892d99e902d Mon Sep 17 00:00:00 2001 From: Andrew Yates <ayates@ebi.ac.uk> Date: Mon, 19 Jul 2010 15:26:46 +0000 Subject: [PATCH] Addition of utility classes for use in the Ensembl API. The first are shortcuts for asserting object/ref types and the second is a class which removes boilerplate from working with databases. --- modules/Bio/EnsEMBL/Utils/Scalar.pm | 137 +++++ modules/Bio/EnsEMBL/Utils/SqlHelper.pm | 806 +++++++++++++++++++++++++ modules/t/sqlHelper.t | 143 +++++ modules/t/utilsScalar.t | 43 ++ 4 files changed, 1129 insertions(+) create mode 100644 modules/Bio/EnsEMBL/Utils/Scalar.pm create mode 100644 modules/Bio/EnsEMBL/Utils/SqlHelper.pm create mode 100644 modules/t/sqlHelper.t create mode 100644 modules/t/utilsScalar.t diff --git a/modules/Bio/EnsEMBL/Utils/Scalar.pm b/modules/Bio/EnsEMBL/Utils/Scalar.pm new file mode 100644 index 0000000000..aba4315997 --- /dev/null +++ b/modules/Bio/EnsEMBL/Utils/Scalar.pm @@ -0,0 +1,137 @@ +package Bio::EnsEMBL::Utils::Scalar; + +=pod + +=head1 LICENSE + + Copyright (c) 1999-2010 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <ensembl-dev@ebi.ac.uk>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=pod + +=head1 NAME + +Bio::EnsEMBL::Utils::Scalar + +=head1 SYNOPSIS + + use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref); + + check_ref([], 'ARRAY'); # Will return true + check_ref({}, 'ARRAY'); # Will return false + check_ref($dba, 'Bio::EnsEMBL::DBSQL::DBAdaptor'); #Returns true if $dba is a DBAdaptor + + assert_ref([], 'ARRAY'); #Returns true + assert_ref({}, 'ARRAY'); #throws an exception + assert_ref($dba, 'Bio::EnsEMBL::Gene'); #throws an exception if $dba is not a Gene + +=head1 DESCRIPTION + +A collection of subroutines aimed to helping Scalar based operations + +=head1 METHODS + +See subroutines. + +=head1 MAINTAINER + +$Author$ + +=head1 VERSION + +$Revision$ + +=cut + +use strict; +use warnings; + +use base qw(Exporter); +our @EXPORT_OK = qw(check_ref assert_ref); + +use Bio::EnsEMBL::Utils::Exception qw(throw); +use Scalar::Util qw(blessed); + +=head2 check_ref() + + Arg [1] : The reference to check + Arg [2] : The type we expect + Description : A subroutine which checks to see if the given object/ref is + what you expect. If you give it a blessed reference then it + will perform an isa() call on the object after the defined + tests. If it is a plain reference then it will use ref(). + + An undefined value will return a false. + Returntype : Boolean indicating if the reference was the type we + expect + Example : my $ok = check_ref([], 'ARRAY'); + Exceptions : If the expected type was not set + Status : Stable + +=cut + +sub check_ref { + my ($ref, $expected) = @_; + throw('No expected type given') if ! defined $expected; + if(defined $ref) { + if(blessed($ref)) { + return 1 if $ref->isa($expected); + } + else { + my $ref_ref_type = ref($ref); + return 1 if defined $ref_ref_type && $ref_ref_type eq $expected; + } + } + return 0; +} + +=head2 assert_ref() + + Arg [1] : The reference to check + Arg [2] : The type we expect + Description : A subroutine which checks to see if the given object/ref is + what you expect. This behaves in an identical manner as + C<check_ref()> does except this will raise exceptions when + the values do not match rather than returning a boolean + indicating the situation. + + Undefs cause exception circumstances. + Returntype : None + Example : assert_ref([], 'ARRAY'); + Exceptions : If the expected type was not set and if the given reference + was not assignable to the expected value + Status : Stable + +=cut + +sub assert_ref { + my ($ref, $expected) = @_; + throw('No expected type given') if ! defined $expected; + my $class = ref($ref); + throw('Given reference was undef') unless defined $ref; + throw('Asking for the type of the reference produced no type; check your input is a reference') unless $class; + if(blessed($ref)) { + throw("Reference '${class}' is not an ISA of '${expected}'") if ! $ref->isa($expected); + } + else { + throw("'${expected}' expected class was not equal to actual class '${class}'") if $expected ne $class; + } + return 1; +} + +1; diff --git a/modules/Bio/EnsEMBL/Utils/SqlHelper.pm b/modules/Bio/EnsEMBL/Utils/SqlHelper.pm new file mode 100644 index 0000000000..1dc33b80a8 --- /dev/null +++ b/modules/Bio/EnsEMBL/Utils/SqlHelper.pm @@ -0,0 +1,806 @@ +package Bio::EnsEMBL::Utils::SqlHelper; + +=pod + +=head1 LICENSE + + Copyright (c) 1999-2010 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <ensembl-dev@ebi.ac.uk>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=pod + +=head1 NAME + +Bio::EnsEMBL::Utils::SqlHelper + +=head1 VERSION + +$Revision$ + +=head1 SYNOPSIS + + use Bio::EnsEMBL::Utils::SqlHelper; + + my $helper = Bio::EnsEMBL::Utils::SqlHelper->new(-DB_CONNECTION => $dbc); + my $arr_ref = $helper->execute( + -SQL => 'select name, age from tab where col =?', + -CALLBACK => sub { + my @row = @{shift @_}; + return {name=>$row[0], age=>$row[1]}; + }, + -PARAMS => ['A'] + ); + + use Data::Dumper; + print Dumper($arr_ref), "\n"; + #Prints out [name=>'name', age=>1] maybe .... + + #For transactional work; only works if your MySQL table + #engine/database supports transactional work (such as InnoDB) + $helper->transaction( -CALLBACK => sub { + if($helper->execute_single_result(-SQL => 'select count(*) from tab')) { + return $helper->execute_update('delete from tab); + } + else { + return $helper->batch(-SQL => 'insert into tab (?,?)', -DATA => [ + [1,2], + [1,3], + [1,4] + ]); + } + }); + +=head1 DESCRIPTION + +Easier database interaction + +=head1 COMMITTER + +$Author$ + +=head1 METHODS + +See subrotuines. + +=cut + +use warnings; +use strict; + +use Bio::EnsEMBL::Utils::Argument qw(rearrange); +use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref); +use Bio::EnsEMBL::Utils::Exception qw(throw warning); +use English qw( -no_match_vars ); #Used for $PROCESS_ID +use Scalar::Util qw(weaken); #Used to not hold a strong ref to DBConnection + +=pod + +=head2 new() + + Arg [DB_CONNECTION] : DBConnection instance to use + Returntype : Instance of helper + Exceptions : If the object given as a DBConnection is not one + Status : Stable + +Creates a new instance of this object. + + my $dba = get_dba('mydb'); # New DBAdaptor from somewhere + my $helper = Bio::EnsEMBL::Utils::SqlHelper->new(-DB_CONNECTION => $dba->dbc()); + $helper->execute_update(-SQL => 'update tab set flag=?', -PARAMS => [1]); + +=cut + +sub new { + my ( $class, @args ) = @_; + + my ($db_connection) = rearrange([qw(db_connection)], @args); + + my $self = bless( {}, ref($class) || $class ); + $self->db_connection($db_connection); + + return $self; +} + +=pod + + Arg [1] : DBConnection instance to use + Description : Sets and retrieves the DBConnection + Returntype : DBConnection if set; otherwise undef + Exceptions : If the object given as a DBConnection is not one or if an + attempt is made to set the value more than once + Status : Stable + +=cut + +sub db_connection { + my ($self, $db_connection) = @_; + if(defined $db_connection) { + if(exists $self->{db_connection}) { + throw('Cannot reset the DBConnection object; already defined '); + } + assert_ref($db_connection, 'Bio::EnsEMBL::DBSQL::DBConnection'); + $self->{db_connection} = $db_connection; + weaken $self->{db_connection}; + } + return $self->{db_connection}; +} + +# --------- SQL Methods + +=pod + +=head2 execute() - Execute a SQL statement with a custom row handler + + Arg [SQL] : SQL to execute + Arg [CALLBACK] : The callback to use for mapping a row to a data point; + leave blank for a default mapping to a 2D array + Arg [USE_HASHREFS] : If set to true will cause HashRefs to be returned + to the callback & not ArrayRefs + Arg [PARAMS] : The binding parameters to the SQL statement + Returntype : 2D array containing the return of the callback + Exceptions : If errors occur in the execution of the SQL + Status : Stable + + my $arr_ref = $helper->execute( + -SQL => 'select a,b,c from tab where col =?', + -CALLBACK => sub { + my @row = @{shift @_}; + return {A=>$row[0], B=>$row[1], C=>$row[2]}; + }, + -PARAMS => ['A'] + ); + + #Or with hashrefs + my $arr_ref = $helper->execute( + -SQL => 'select a,b,c from tab where col =?', + -USE_HASHREFS => 1, + -PARAMS => ['A'], + -CALLBACK => sub { + my $row = shift @_; + return {A=>$row->{a}, B=>$row->{b}, C=>$row->{c}}; + } + ); + +Uses a callback defined by the C<sub> decalaration. Here we specify how the +calling code will deal with each row of a database's result set. The sub +can return any type of Object/hash/data structure you require. + +Should you not specify a callback then a basic one will be assigned to you +which will return a 2D array structure e.g. + + my $arr_ref = $helper->execute( + -SQL => 'select a,b,c from tab where col =?', + -PARAMS => ['A'] + ); + +This is equivalent to DBI's c<selectall_arrayref()> subroutine. + +As an extension to this method you can write a closure subroutine which +takes in two parameters. The first is the array/hash reference & the second is +the statement handle used to execute. 99% of the time you will not need +it but there are occasions where you do need it. An example of usage would be: + + my $conn = get_conn(); #From somwewhere + my $arr_ref = $conn->execute( + -SQL => 'select a,b,c from tab where col =?', + -USE_HASHREFS => 1, + -PARAMS => ['A'], + -CALLBACK => sub { + my ($row, $sth) = @_; + #Then do something with sth + return {A=>$row->[0], B=>$row->[1], C=>$row->[2]}; + } + ); + +Any arguments to bind to the incoming statement. This can be a set of scalars +or a 2D array if you need to specify any kind of types of sql objects i.e. + + use DBI qw(:sql_types); + + my $conn = get_conn(); + my $arr_ref = $conn->execute( + -SQL => 'select a,b,c from tab where col =? and num_col=? and other=?', + -USE_HASHREFS => 1, + -CALLBACK => sub { + my @row = @{shift @_}; + return {A=>$row[0], B=>$row[1], C=>$row[2]}; + }, + -PARAMS => ['1', SQL_VARCHAR], [2, SQL_INTEGER], 'hello' + ); + +Here we import DBI's sql types into our package and then pass in multiple +anonymous array references as parameters. Each param is tested in the input +and if it is detected to be an ARRAY reference we dereference the array and +run DBI's bind_param method. In fact you can see each part of the incoming +paramaters array as the contents to call C<bind_param> with. The only difference +is the package tracks the bind position for you. + +=cut + +sub execute { + my ( $self, @args ) = @_; + my ($sql, $callback, $use_hashrefs, $params) = rearrange([qw(sql callback use_hashrefs params)], @args); + my $has_return = 1; + + #If no callback then we execute using a default one which returns a 2D array + if(!defined $callback) { + throw('Cannot use fetchrow_hashref() with default mappers. Turn off this option') if $use_hashrefs; + $callback = $self->_mappers()->{array_ref}; + } + + return $self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params ); +} + +=pod + +=head2 execute_simple() + + Arg [SQL] : SQL to execute + Arg [PARAMS] : The binding parameters to the SQL statement + Returntype : 1D array of data points + Exceptions : If errors occur in the execution of the SQL + Status : Stable + + my $classification = $helper->execute_simple( + -SQL => 'select meta_val from meta where meta_key =? order by meta_id', + -PARAMS => ['species.classification'] + ); + +Identical to C<execute> except you do not specify a sub-routine reference. +Using this code assumes you want an array of single scalar values as returned +by the given SQL statement. + +=cut + +sub execute_simple { + my ( $self, @args ) = @_; + my ($sql, $params) = rearrange([qw(sql params)], @args); + my $has_return = 1; + my $use_hashrefs = 0; + my $callback = $self->_mappers()->{first_element}; + return $self->_execute($sql, $callback, $has_return, $use_hashrefs, $params); +} + +=pod + +=head2 execute_no_return() + + Arg [SQL] : SQL to execute + Arg [CALLBACK] : The callback to use for mapping a row to a data point; + we assume you are assigning into a data structure which + has requirements other than simple translation into an + array + Arg [USE_HASHREFS] : If set to true will cause HashRefs to be returned + to the callback & not ArrayRefs + Arg [PARAMS] : The binding parameters to the SQL statement + Returntype : None + Exceptions : If errors occur in the execution of the SQL + Status : Stable + +Whilst all other execute methods will return something; this assumes that the +given mapper subroutine will be performing the business of placing values +somewhere or doing something with them. + +There is a huge temptation to nest queries using this method; do not! Execute +the values into an array using one of the other methods then run your subqueries +on them; or make a better first query. SQL is flexible; so use it. + +=cut + +sub execute_no_return { + my ( $self, @args ) = @_; + my ($sql, $callback, $use_hashrefs, $params) = rearrange([qw(sql callback use_hashrefs params)], @args); + throw('No callback defined but this is a required parameter for execute_no_return()') if ! $callback; + my $has_return = 0; + $self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params ); + return; +} + +=pod + +=head2 execute_into_hash() + + Arg [SQL] : SQL to execute + Arg [CALLBACK] : The callback to use for mapping to a value in a hash + keyed by the first element in your result set; + leave blank for a default mapping to a scalar value + of the second element + Arg [PARAMS] : The binding parameters to the SQL statement + Returntype : A HashRef keyed by column 1 & value is the return of + the callback + Exceptions : If errors occur in the execution of the SQL + Status : Stable + +A variant of the execute methods but rather than returning a list of mapped +results this will assume the first column of a returning map & the calling +subroutine will map the remainder of your return as the hash's key. For example: + + my $sql = 'select key, one, two from table where something =?'; + my $mapper = sub { + my ($row) = @_; + #Ignore field 0 as that is being used for the key + my $obj = Some::Obj->new(one=>$row->[1], two=>$row->[2]); + return $obj; + }; + + my $hash = $helper->execute_into_hash(-SQL => $sql, -CALLBACK => $mapper, -PARAMS => ['val']); + + #Or for a more simple usage + my $sql = 'select biotype, count(gene_id) from gene group by biotype'; + my $biotype_hash = $conn->execute_into_hash(-SQL => $sql); + print $biotype_hash->{protein_coding} || 0, "\n"; + +The basic pattern assumes a scenario where you are mapping in a one key to +one value. For more advanced mapping techniques you need to start using the +non-consuming executes which allow you to process a result set without assuming +that you want to map the rows into single objects. + +B<Remember that the row you are given is the full row & not a view of the +reminaing fields.> Therefore indexing for the data you are concerned with +begins at position 1. + +=cut + +sub execute_into_hash { + my ( $self, @args ) = @_; + my ($sql, $callback, $params) = rearrange([qw(sql callback params)], @args); + my $hash = {}; + + #If no callback then we execute using a default one which sets value to 2nd element + if(!defined $callback) { + $callback = $self->_mappers()->{second_element}; + } + + #Default mapper uses the 1st key + something else from the mapper + my $mapper = sub { + my $row = shift @_; + my $value = $callback->($row); + $hash->{ $row->[0] } = $value; + return; + }; + + $self->execute_no_return( + -SQL => $sql, + -CALLBACK => $mapper, + -PARAMS => $params + ); + + return $hash; +} + +=pod + +=head2 execute_single_result() + + Arg [SQL] : SQL to execute + Arg [CALLBACK] : The callback to use for mapping a row to a data point; + leave blank for a default scalar mapping + Arg [USE_HASHREFS] : If set to true will cause HashRefs to be returned + to the callback & not ArrayRefs + Arg [PARAMS] : The binding parameters to the SQL statement + Returntype : One data point + Exceptions : If errors occur in the execution of the SQL + Status : Stable + + my $meta_count = $helper->execute_single_result( + -SQL => 'select count(*) from meta where species_id =?', + -PARAMS => [1] + ); + +Very similar to C<execute()> except it will raise an exception if we have more +or less than one row returned + +=cut + +sub execute_single_result { + my ( $self, @args ) = @_; + my ($sql, $callback, $use_hashrefs, $params) = rearrange( + [qw(sql callback use_hashrefs params)], @args); + + my $results = $self->execute_simple( + -SQL => $sql, + -CALLBACK => $callback, + -USE_HASHREFS => $use_hashrefs, + -PARAMS => $params + ); + + my $result_count = scalar(@{$results}); + if($result_count != 1) { + $params = [] if ! $params; + my $type = ($result_count == 0) ? 'No' : 'Too many'; + my $msg = "${type} results returned. Expected 1 but got $result_count for query '${sql}' with params ["; + $msg .= join( ',', map {(defined $_) ? $_ : '-undef-';} @{$params} ); + $msg .= ']'; + throw($msg); + } + return $results->[0]; +} + +=pod + +=head2 transaction() + + Arg [CALLBACK] : The callback used for transaction isolation; once + the subroutine exists the code will decide on rollback + or commit + Returntype : Return of the callback + Exceptions : If errors occur in the execution of the SQL + Status : Stable + + my $val = $helper->transaction(-CALLBACK => sub { + my ($dbc) = @_; + #Do something + return 1; + }); + + #Or because of the arguments method we use + my $val = $helper->transaction(sub { + my ($dbc) = @_; + #Do something + return 1; + }); + +Creates a transactional block which will ensure that the connection is committed +when your submmited subroutine has finished or will rollback in the event of +an error occuring in your block. + +The code will always force AutoCommit off but will restore it to its +previous setting. If your DBI/DBD driver does not support manual commits +then this code will break. The code will turn off the +C<disconnect_when_idle()> method to allow transactions to work as expected. + +Creating a transaction within a transaction results in the commit rollback +statements occuring in the top level transaction. That way any block of +code which is meant to to be transaction can be wrapped in this block ( +assuming the same instance of SQLHelper is passed around & used). + +=cut + +sub transaction { + my ($self, @args) = @_; + + my ($callback) = rearrange([qw(callback)], @args); + + throw('Callback was not a CodeRef. Got a reference of type ['.ref($callback).']') + unless check_ref($callback, 'CODE'); + + my $dbc = $self->db_connection(); + my $original_dwi; + my $ac; + + #If we were already in a transaction then we do not do any management of the + #session & wait for the parent transaction(s) to finish + my $perform_transaction = $self->_perform_transaction_code(); + if($perform_transaction) { + $original_dwi = $dbc->disconnect_when_inactive(); + $ac = $dbc->db_handle()->{'AutoCommit'}; + $dbc->db_handle()->{'AutoCommit'} = 0; + $self->_enable_transaction(); + } + + my $error; + my $result; + + eval { + $result = $callback->($dbc); + $dbc->db_handle()->commit() if $perform_transaction; + }; + $error = $@; + + if($perform_transaction) { + if($error) { + eval { $dbc->db_handle()->rollback(); }; + } + $dbc->db_handle()->{'AutoCommit'} = $ac; + $dbc->disconnect_when_inactive($original_dwi); + $self->_disable_transaction(); + } + + throw("Transaction aborted because of error: ${error}") if $error; + + return $result; +} + +=pod + +=head2 execute_update() + + Arg [SQL] : SQL to execute + Arg [CALLBACK] : The callback to use for calling methods on the + DBI statement handle or DBConnection object after an + update command + Arg [PARAMS] : The binding parameters to the SQL statement + Returntype : Number of rows affected + Exceptions : If errors occur in the execution of the SQL + Status : Stable + +Used for performing updates but conforms to the normal execute statement +subroutines. + + use DBI qw(:sql_types); + $helper->execute_update( + -SQL => 'update tab set name = ? where id =?', + -PARAMS => ['andy', [1, SQL_INTEGER]] + ); + +If you need to do something a bit more advanced with your DML then you can +give the method a closure and this will be called after the execute has been +issued i.e. + + my $obj; + $helper->execute_update( + -SQL => 'insert into tab (name) values(?)', + -CALLBACK => sub { + my ($sth, $dbc) = @_; + $obj->{id} = $dbh->{mysql_insertid); + }, + -PARAMS => [$obj->name()] + ); + +This lets us access the statement handle & database handle to access other +properties such as the last identifier inserted. + +=cut + +sub execute_update { + my ($self, @args) = @_; + my ($sql, $callback, $params) = rearrange([qw(sql callback params)], @args); + my $rv = 0; + my $sth; + eval { + $sth = $self->db_connection()->prepare($sql); + $self->_bind_params($sth, $params); + $rv = $sth->execute(); + $callback->($sth, $self->db_connection()) if $callback; + }; + my $error = $@; + $self->_finish_sth($sth); + if($error) { + my $params = join ' ', map { (defined $_) ? $_ : q{undef} } @{$params}; + throw("Cannot apply sql '${sql}' with params '${params}': ${error}"); + } + return $rv; +} + +=pod + +=head1 batch() + + Arg [SQL] : SQL to execute + Arg [CALLBACK] : The callback to use for binding the data to execute in + the batch statement; optional (if you specify DATA) + Arg [DATA] : The data to use in the batch statement; optional (if you + specify CALLBACK) + Returntype : Number of rows affected + Exceptions : If errors occur in the execution of the SQL + Status : Stable + + my $alotofdata = getitfromsomewhere(); + $helper->batch(-SQL => 'insert into table (one,two) values(?,?)', -CALLBACk => sub { + my ($sth, $dbc) = @_; + foreach my $data (@alotofdata) { + $sth->execute(@{$data}); + } + }); + + #Or for a 2D array data driven approach + $helper->batch(-SQL => 'insert into table (one,two) values(?,?)', -DATA => $alotofdata); + +Takes in a sql statement & a code reference. Your SQL is converted into a +prepared statement & then given as the first parameter to the closure. The +second parameter is the DBH which created the statement. This is intended +to let you do mass insertion into a database without the need to +re-preparing the same statement. + +This can be combined with the transaction() code to provide a construct +which does batch insertion & is transactionally aware. + +We can also use data based batch insertions i.e. + + #Needs to be like: + # [ [1,2], [3,4] ] + #Or if using the DBI types: + # [ [ [1, SQL_INTEGER], [2, SQL_INTEGER] ], [ [3, SQL_INTEGER], [4, SQL_INTEGER] ] ] + my $alotofdata = getitfromsomewhere(); + $helper->batch(-SQL => 'insert into table (one,two) values(?,?)', + -DATA => $alotofdata); + +This does exactly what the previous example. + +All batch statements will return the value the callback computes. If you are +using the previous example with a data array then the code will return the +number affected rows by the query. + +=cut + +sub batch { + my ($self, @args) = @_; + my ($sql, $callback, $data) = rearrange([qw(sql callback data)], @args); + + if(! defined $callback && ! defined $data) { + throw('You need to define a callback for insertion work or the 2D data array'); + } + + my $result; + if(defined $callback) { + $result = $self->_callback_batch($sql, $callback); + } + else { + $result = $self->_data_batch($sql, $data); + } + return $result if defined $result; + return; +} + +#------- Internal methods + +sub _mappers { + my ($self) = @_; + if(! exists $self->{_mappers}) { + $self->{_mappers} = { + first_element => sub { + my ($row) = @_; + return $row->[0]; + }, + second_element => sub { + my ($row) = @_; + return $row->[1]; + }, + array_ref => sub { + my $row = shift @_; + return [@{$row}]; + } + }; + } + return $self->{_mappers}; +} + +sub _perform_transaction_code { + my ($self) = @_; + return $self->{_transaction_active}->{$PROCESS_ID} ? 0 : 1; +} + +sub _enable_transaction { + my ($self) = @_; + $self->{_transaction_active}->{$PROCESS_ID} = 1; + return; +} + +sub _disable_transaction { + my ($self) = @_; + delete $self->{_transaction_active}->{$PROCESS_ID}; + return; +} + +sub _bind_params { + my ( $self, $sth, $params ) = @_; + + return if ! defined $params; #Return quickly if we had no data + + if(! check_ref($params, 'ARRAY')) { + throw(qq{The given parameters reference '${params}' is not an ARRAY; wrap in an ArrayRef}); + } + + my $count = 1; + foreach my $param (@{$params}) { + if ( check_ref($param, 'ARRAY') ) { + $sth->bind_param( $count, @{$param} ); + } + else { + $sth->bind_param( $count, $param ); + } + $count++; + } + return; +} + +sub _execute { + my ( $self, $sql, $callback, $has_return, $use_hashrefs, $params ) = @_; + + throw('Not given a mapper. _execute() must always been given a CodeRef') unless check_ref($callback, 'CODE'); + + $params = [] unless $params; + + my $conn = $self->db_connection; + my @results; + + my $error; + my $sth_close_error; + my $sth; + + eval { + $sth = $conn->prepare($sql); + throw("Cannot continue as prepare() did not return a handle") unless $sth; + $self->_bind_params( $sth, $params ); + $sth->execute(); + if($use_hashrefs) { + while( my $row = $sth->fetchrow_hashref() ) { + push(@results, $callback->($row, $sth)); + } + } + else { + while ( my $row = $sth->fetchrow_arrayref() ) { + push(@results, $callback->($row, $sth)); + } + } + }; + + $error = $@; + $self->_finish_sth($sth); + if($error) { + throw("Cannot run '${sql}' with params '@{$params}' due to error: $error") if $error; + } + return \@results if $has_return; + return; +} + +sub _finish_sth { + my ($self, $sth) = @_; + eval { $sth->finish() if defined $sth; }; + warning('Cannot finish() the statement handle: $@') if $@; + return; +} + +sub _callback_batch { + my ($self, $sql, $callback) = @_; + my $error; + my $sth; + my $closure_return; + eval { + $sth = $self->db_connection()->prepare($sql); + $closure_return = $callback->($sth, $self->db_connection()); + }; + $error = $@; + $self->_finish_sth($sth); + throw("Problem detected during batch work: $error") if $error; + + return $closure_return if defined $closure_return; + return; +} + +sub _data_batch { + my ($self, $sql, $data) = @_; + + #Input checks + assert_ref($data, 'ARRAY'); + my $data_length = scalar(@{$data}); + return 0 unless $data_length > 0; + my $first_row = $data->[0]; + throw('I expect to work with a 2D ArrayRef but this is not one') unless check_ref($first_row, 'ARRAY'); + + my $callback = sub { + my ($sth, $dbc) = @_; + my $total_affected = 0; + #Iterate over each data point + for(my $data_index = 0; $data_index < $data_length; $data_index++) { + my $row = $data->[$data_index]; + $self->_bind_params($sth, $row); + my $affected = eval {$sth->execute()}; + if($@) { + throw("Problem working with $sql with params @{$row}: $@"); + } + my $num_affected = ($affected) ? $affected : 0; #Get around DBI's 0E0 + $total_affected += $num_affected; + } + return $total_affected || 0; + }; + + return $self->_callback_batch($sql, $callback) +} + +1; \ No newline at end of file diff --git a/modules/t/sqlHelper.t b/modules/t/sqlHelper.t new file mode 100644 index 0000000000..4afd4962ae --- /dev/null +++ b/modules/t/sqlHelper.t @@ -0,0 +1,143 @@ +#A set tests used to prod the SqlHelper class + +use strict; +use warnings; + +use Test::More; +use Test::Exception; +use Scalar::Util qw(isweak); + +use Bio::EnsEMBL::Test::MultiTestDB; +use Bio::EnsEMBL::Test::TestUtils; +use Bio::EnsEMBL::Utils::SqlHelper; + +my $multi = Bio::EnsEMBL::Test::MultiTestDB->new(); +my $dba = $multi->get_DBAdaptor( 'core' ); +ok( $dba, 'Test database instatiated' ); + +#Now start testing the Helper +dies_ok { Bio::EnsEMBL::DBSQL::SqlHelper->new(-DB_CONNECTION => $dba) } + 'Expect to die when we do not give SqlHelper a DBConncetion'; #was given a DBAdaptor +ok ( + isweak(Bio::EnsEMBL::DBSQL::SqlHelper->new(-DB_CONNECTION => $dba->dbc())->{db_connection}), + 'Checking DBConnection reference is weak when we ask for it' +); + +my $helper = Bio::EnsEMBL::DBSQL::SqlHelper->new(-DB_CONNECTION => $dba->dbc()); +ok ( $helper, 'SqlHelper instance was created' ); + + +my $meta_key = 'species.common_name'; +diag("Meta key queries working with ${meta_key}. If the tests fail then check for it in the DB dumps"); + +is( + $helper->execute_single_result(-SQL => qq{select count(*) from meta where meta_key = '$meta_key'}), + 1, + 'Checking count of meta key is right with no params' +); + +is( + $helper->execute_single_result(-SQL => 'select count(*) from meta where meta_key =?', -PARAMS => [$meta_key]), + 1, + 'Checking count of meta key is right with params' +); + +is_deeply( + $helper->execute(-SQL => 'select count(*), 3 from meta where meta_key =?', -PARAMS => [$meta_key])->[0], + [1,3], + 'Checking 2D mapping of meta key count works' +); + +my $meta_count_hash = $helper->execute_into_hash( + -SQL => 'select meta_key, count(*) from meta group by meta_key' +); + +is($meta_count_hash->{$meta_key}, 1, 'Checking hash comes back correctly'); + +my $meta_table_count = $helper->execute_single_result(-SQL => 'select count(*) from meta'); +my $meta_memoize = $helper->execute(-SQL => 'select * from meta'); + +is(scalar(@{$meta_memoize}), $meta_table_count, 'All meta items are returned'); + +$dba->dbc()->do('alter table meta engine=InnoDB'); + +ok($helper->_perform_transaction_code(), 'This level should do all transaction work'); + +my $get_value = sub { + return $helper->execute_single_result(-SQL => 'select meta_value from meta where meta_key =?', -PARAMS => [$meta_key]); +}; + +{ + #transaction isolation checks + throws_ok { + $helper->transaction(-CALLBACK => sub { + my $sql = 'insert into meta (species_id, meta_key, meta_value) values (?,?,?)'; + $helper->execute_update( + -SQL => $sql, + -PARAMS => [2, 'm', '1'] + ); + $helper->execute_update( + -SQL => $sql, + -PARAMS => [2, 'm', '2'] + ); + + my $count = $helper->execute_single_result(-SQL => 'select count(*) from meta where species_id =?', -PARAMS => [2]); + is($count, 2, 'Count should be 2'); + die 'Dead now'; + }); + }qr/Dead now/, 'Died as expected'; + my $count = $helper->execute_single_result(-SQL => 'select count(*) from meta where species_id =?', -PARAMS => [2]); + is($count, 0, 'Count should be 0 as we reset the transaction'); +} + +#Testing multiple level isolation (or more that the framework ignores them) +{ + my $new_meta_value = 'test'; + throws_ok { + $helper->transaction( -CALLBACK => sub { + $helper->execute_update(-SQL => 'update meta set meta_value =? where meta_key =?', -PARAMS => [$new_meta_value, $meta_key]); + eval { + $helper->transaction(-CALLBACK => sub { + ok(!$helper->_perform_transaction_code(), 'This level should not be doing any transaction work'); + die 'This will not cause the transaction to be aborted'; + }); + }; + is($get_value->(), $new_meta_value, 'The die from the prior transaction should not have triggered a rollback'); + die('Dead now'); + }); + } qr/Dead now/, 'Expected die found'; + + isnt($get_value->(), $new_meta_value, 'Meta value is reset as transaction was aborted'); + + $helper->transaction( -CALLBACK => sub { + $helper->execute_update(-SQL => 'delete from meta'); + }); + + $helper->transaction( -CALLBACK => sub { + $helper->batch(-SQL => 'insert into meta values (?,?,?,?)', -DATA => $meta_memoize); + }); + + my $new_count_hash = $helper->execute_into_hash( + -SQL => 'select meta_key, count(*) from meta group by meta_key' + ); + is_deeply($new_count_hash, $meta_count_hash, 'Counts of meta keys should be the same'); +} + +#Doing hashref checks +{ + my $sql = 'select meta_key, meta_value from meta where meta_key =?'; + my $callback = sub { + my ($row) = @_; + return { name => $row->{meta_value} }; + }; + my $array_of_hashes = $helper->execute( + -SQL => $sql, + -CALLBACK => $callback, + -USE_HASHREFS => 1, + -PARAMS => ['species.common_name'] + ); + is_deeply($array_of_hashes, [ { name => 'Human' } ], 'HashRefs in a callback works'); +} + +$dba->dbc()->do('alter table meta engine=MyISAM'); +done_testing(); diff --git a/modules/t/utilsScalar.t b/modules/t/utilsScalar.t new file mode 100644 index 0000000000..240af5d84a --- /dev/null +++ b/modules/t/utilsScalar.t @@ -0,0 +1,43 @@ +use strict; +use warnings; + +use Test::More; +use Test::Exception; + +use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref); +use Bio::EnsEMBL::IdMapping::TinyGene; + +my $gene = Bio::EnsEMBL::IdMapping::TinyGene->new_fast([]); + +dies_ok { assert_ref(undef, 'ARRAY') } 'Undef value results in death'; +dies_ok { assert_ref([], undef) } 'Undef assertion results in death'; +throws_ok { assert_ref('string', 'ARRAY') } qr/produced no type/, 'Passing in a Scalar means death'; +dies_ok { assert_ref(\'', 'ARRAY') } 'Ref of a Scalar is not an ARRAY so death'; +dies_ok { assert_ref($gene, 'CODE') } 'TinyGene object is not a CODE so death'; +dies_ok { assert_ref($gene, 'Bio::EnsEMBL::Feature') } 'TinyGene object is not a Bio::EnsEMBL::Feature so death'; +dies_ok { assert_ref($gene, 'HASH') } 'TinyGene is blessed so we expect false even though it is a HASH'; + +lives_ok { assert_ref(\'', 'SCALAR') } 'Ref of a Scalar should be a SCALAR'; +lives_ok { assert_ref([], 'ARRAY') } 'Ref of an array should be a ARRAY'; +lives_ok { assert_ref({}, 'HASH') } 'Ref of a hash should be a HASH'; +lives_ok { assert_ref($gene, 'Bio::EnsEMBL::IdMapping::TinyFeature') } 'Ref of a gene should be a TinyFeature'; +lives_ok { assert_ref($gene, 'Bio::EnsEMBL::IdMapping::TinyGene') } 'Ref of a gene should be a TinyGene'; + +#Now for check_ref + +dies_ok { check_ref([], undef) } 'Undef for assertion in check_ref results in death'; + +ok(! check_ref(undef, 'ARRAY'), 'Undef value returns false'); +ok(! check_ref('string', 'ARRAY'), 'Passing in a Scalar means returns false'); +ok(! check_ref(\'', 'ARRAY'), 'Ref of a Scalar is not an ARRAY so returns false'); +ok(! check_ref($gene, 'CODE'), 'TinyGene object is not a CODE so returns false'); +ok(! check_ref($gene, 'Bio::EnsEMBL::Feature'), 'TinyGene object is not a Bio::EnsEMBL::Feature so returns false'); +ok(! check_ref($gene, 'HASH'), 'TinyGene is blessed so we expect false even though it is a HASH'); + +ok ( check_ref(\'', 'SCALAR'), 'Ref of a Scalar should be a SCALAR'); +ok ( check_ref([], 'ARRAY'), 'Ref of an array should be a ARRAY'); +ok ( check_ref({}, 'HASH'), 'Ref of a hash should be a HASH'); +ok ( check_ref($gene, 'Bio::EnsEMBL::IdMapping::TinyFeature'), 'Ref of a gene should be a TinyFeature'); +ok ( check_ref($gene, 'Bio::EnsEMBL::IdMapping::TinyGene'), 'Ref of a gene should be a TinyGene'); + +done_testing(); -- GitLab