From cab090e8d51d81f5e4a457a82775e43f9c514497 Mon Sep 17 00:00:00 2001
From: Ian Longden <ianl@sanger.ac.uk>
Date: Thu, 22 Sep 2011 09:52:43 +0000
Subject: [PATCH] add_xref now takes a hash instead  of a list for easy
 maintainance. Lots of docs and removal of old subroutines. perlcritic fixes.

---
 .../xref_mapping/XrefParser/BaseParser.pm     | 394 +++++++++---------
 1 file changed, 187 insertions(+), 207 deletions(-)

diff --git a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm
index 6ccf5ec05a..26c604f315 100644
--- a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm
@@ -18,12 +18,6 @@ my $add_dependent_xref_sth = undef;
 my $get_xref_sth = undef;
 my $add_synonym_sth = undef;
 
-#my $dbi;
-#my %dependent_sources;
-#my %taxonomy2species_id;
-#my %species_id2taxonomy;
-#my %name2species_id;
-#my %species_id2name;
 my %xref_dependent_mapped;
 
 
@@ -40,7 +34,7 @@ sub new
   my ($proto, $database, $is_verbose) = @_;
 
   if((!defined $database)){# or (!$database->isa(XrefPArserDatabase)))
-    croak "No database specfied";
+    croak 'No database specfied';
   }
   $verbose = $is_verbose;
   my $dbi = $database->dbi;
@@ -76,8 +70,11 @@ sub get_filehandle
 {
     my ($self, $file_name) = @_;
 
-    my $io;
+    my $io =undef;
 
+    if(!(defined $file_name) or $file_name eq ''){
+      confess "No file name";
+    }
     my $alt_file_name = $file_name;
     $alt_file_name =~ s/\.(gz|Z)$//x;
 
@@ -104,7 +101,7 @@ sub get_filehandle
     if ( !defined $io ) { return }
 
     if ($verbose) {
-      print("Reading from '$file_name'...\n");
+      print "Reading from '$file_name'...\n" || croak 'Could not print out message';
     }
 
     return $io;
@@ -122,9 +119,11 @@ sub get_filehandle
 sub get_source_id_for_source_name {
   my ($self, $source_name,$priority_desc) = @_;
 
-  my $sql = "SELECT source_id FROM source WHERE LOWER(name)='" . lc($source_name) . "'";
-  if(defined($priority_desc)){
-    $sql .= " AND LOWER(priority_description)='".lc($priority_desc)."'";
+  my $low_name = lc $source_name;
+  my $sql = "SELECT source_id FROM source WHERE LOWER(name)='$low_name'";
+  if(defined $priority_desc){
+    $low_name = lc $priority_desc;
+    $sql .= " AND LOWER(priority_description)='$low_name'";
     $source_name .= " ($priority_desc)";
   }
   my $sth = $self->dbi->prepare($sql);
@@ -132,13 +131,13 @@ sub get_source_id_for_source_name {
   my @row = $sth->fetchrow_array();
   my $source_id;
   if (@row) {
-    $source_id = $row[0]; 
+    $source_id = $row[0];
   } else {
     carp "WARNING: There is no entity $source_name in the source-table of the xref database.\n";
     carp "WARNING:. The external db name ($source_name) is hardcoded in the parser\n";
     carp "WARNING: Couldn't get source ID for source name $source_name\n";
 
-    $source_id = -1;
+    $source_id = '-1';
   }
   return $source_id;
 }
@@ -157,7 +156,8 @@ sub get_source_ids_for_source_name_pattern {
 
   my ($self, $source_name) = @_;
 
-  my $sql = "SELECT source_id FROM source WHERE upper(name) LIKE '%".uc($source_name)."%'";
+  my $big_name = uc $source_name;
+  my $sql = "SELECT source_id FROM source WHERE upper(name) LIKE '%${big_name}%'";
 
   my $sth = $self->dbi->prepare($sql);
   my @sources;
@@ -179,18 +179,18 @@ sub get_source_name_for_source_id {
   my ($self, $source_id) = @_;
   my $source_name;
 
-  my $sql = "SELECT name FROM source WHERE source_id= '" . $source_id. "'";
+  my $sql = "SELECT name FROM source WHERE source_id= '$source_id'";
   my $sth = $self->dbi->prepare($sql);
   $sth->execute();
   my @row = $sth->fetchrow_array();
   if (@row) {
-    $source_name = $row[0]; 
+    $source_name = $row[0];
   } else {
     carp "There is no entity with source-id  $source_id  in the source-table of the \n";
     carp "xref-database. The source-id and the name of the source-id is hard-coded in populate_metadata.sql\n" ;
     carp "and in the parser\n";
     carp "Couldn't get source name for source ID $source_id\n";
-    $source_name = -1;
+    $source_name = '-1';
   }
   return $source_name;
 }
@@ -207,30 +207,30 @@ sub get_valid_xrefs_for_dependencies{
   my %dependent_2_xref;
 
 
-  my $sql = "select source_id from source where LOWER(name) =?";
+  my $sql = 'select source_id from source where LOWER(name) =?';
   my $sth = $self->dbi->prepare($sql);
   my @dependent_sources;
-  $sth->execute(lc($dependent_name));
+  $sth->execute(lc $dependent_name);
   while(my @row = $sth->fetchrow_array()){
    push @dependent_sources,$row[0];
   }
 
   my @sources;
   foreach my $name (@reverse_ordered_source_list){
-    $sth->execute(lc($name));
+    $sth->execute(lc $name);
     while(my @row = $sth->fetchrow_array()){
       push @sources,$row[0];
     }
   }
   $sth->finish;
 
-  my $dep_sql = (<<"DSS");
-SELECT d.master_xref_id, x2.accession
-  FROM dependent_xref d, xref x1, xref x2
-    WHERE x1.xref_id = d.master_xref_id AND
-          x1.source_id = ? AND
-          x2.xref_id = d.dependent_xref_id AND
-          x2.source_id = ?
+  my $dep_sql = (<<'DSS');
+  SELECT d.master_xref_id, x2.accession
+    FROM dependent_xref d, xref x1, xref x2
+      WHERE x1.xref_id = d.master_xref_id AND
+            x1.source_id = ? AND
+            x2.xref_id = d.dependent_xref_id AND
+            x2.source_id = ?
 DSS
 
   $sth = $self->dbi->prepare($dep_sql);
@@ -252,28 +252,19 @@ DSS
 # to master_xref_id for all of source names given
 #####################################################
 sub get_valid_xrefs_for_direct_xrefs{
-  my ($self, $direct_name, @list) = @_;
+  my ($self, $direct_name, $separator) = @_;
 
   my %direct_2_xref;
 
 
-  my $sql = "select source_id from source where name like ?";
+  my $sql = 'select source_id from source where name like ?';
   my $sth = $self->dbi->prepare($sql);
   my @direct_sources;
-  $sth->execute($direct_name."%");
+  $sth->execute("${direct_name}%");
   while(my @row = $sth->fetchrow_array()){
     push @direct_sources,$row[0];
   }
 
-  my @sources;
-  foreach my $name (@list){
-    $sth->execute($name);
-    while(my @row = $sth->fetchrow_array()){
-      push @sources,$row[0];
-    }
-  }
-  $sth->finish;
-
   my $gen_sql =(<<"GDS");
 SELECT d.general_xref_id, d.ensembl_stable_id, 'TYPE', d.linkage_xref, x1.accession
   FROM TABLE_direct_xref d, xref x1
@@ -285,18 +276,18 @@ GDS
   my $i=0;
   foreach my $type (qw(Gene Transcript Translation)){
     my $t_sql = $gen_sql;
-    my $table = lc($type);
-    $t_sql =~ s/TABLE/$table/x;
-    $t_sql =~ s/TYPE/$type/x;
+    my $table = lc $type;
+    $t_sql =~ s/TABLE/$table/xsm;
+    $t_sql =~ s/TYPE/$type/xsm;
 
     $sth[$i++] = $self->dbi->prepare($t_sql);
   }
 
   foreach my $d (@direct_sources){
-    for (my $ii =0; $i<3; $i++){
+    for my $ii (0..2) {
       $sth[$ii]->execute($d);
-      while(my @row = $sth[$ii]->fetchrow_array()){
-	$direct_2_xref{$row[4]} = $row[0]."::".$row[1]."::".$row[2]."::".$row[3];
+      while(my ($gen_xref_id, $stable_id, $type, $link, $acc) = $sth[$ii]->fetchrow_array()){
+	$direct_2_xref{$acc} = $gen_xref_id.$separator.$stable_id.$separator.$type.$separator.$link;
       }
     }
   }
@@ -315,7 +306,7 @@ sub label_to_acc{
 
   # First cache synonyms so we can quickly add them later
   my %synonyms;
-  my $syn_sth = $self->dbi->prepare("SELECT xref_id, synonym FROM synonym");
+  my $syn_sth = $self->dbi->prepare('SELECT xref_id, synonym FROM synonym');
   $syn_sth->execute();
 
   my ($xref_id, $synonym);
@@ -329,7 +320,8 @@ sub label_to_acc{
   my %valid_codes;
   my @sources;
 
-  my $sql = "select source_id from source where upper(name) like '%".uc($source_name)."%'";
+  my $big_name = uc $source_name;
+  my $sql = "select source_id from source where upper(name) like '%${big_name}%'";
   my $sth = $self->dbi->prepare($sql);
   $sth->execute();
   while(my @row = $sth->fetchrow_array()){
@@ -368,7 +360,7 @@ sub get_valid_codes{
   # First cache synonyms so we can quickly add them later
   my %synonyms;
   my $dbi = $self->dbi;
-  my $syn_sth = $dbi->prepare("SELECT xref_id, synonym FROM synonym");
+  my $syn_sth = $dbi->prepare('SELECT xref_id, synonym FROM synonym');
   $syn_sth->execute();
 
   my ($xref_id, $synonym);
@@ -382,7 +374,8 @@ sub get_valid_codes{
   my %valid_codes;
   my @sources;
 
-  my $sql = "select source_id from source where upper(name) like '%".uc($source_name)."%'";
+  my $big_name = uc $source_name;
+  my $sql = "select source_id from source where upper(name) like '%$big_name%'";
   my $sth = $dbi->prepare($sql);
   $sth->execute();
   while(my @row = $sth->fetchrow_array()){
@@ -411,33 +404,39 @@ sub get_valid_codes{
 sub upload_xref_object_graphs {
   my ($self, $rxrefs) = @_;
 
-  print "count = ".$#$rxrefs."\n" if($verbose);
+  my $count = scalar @{$rxrefs};
+  if($verbose) {
+    print "count = $count\n" || croak 'Could not print out count';
+  }
 
-  if ($#$rxrefs > -1) {
+  if ($count) {
 
     #################
     # upload new ones
     ##################
-    print "Uploading xrefs\n" if($verbose);
+    if ($verbose) {
+      print "Uploading xrefs\n"
+	|| croak 'Could not print string';
+    }
 
 
     #################################################################################
     # Start of sql needed to add xrefs, primary_xrefs, synonym, dependent_xrefs etc..
     #################################################################################
     my $dbi = $self->dbi;
-    my $xref_sth = $dbi->prepare("INSERT INTO xref (accession,version,label,description,source_id,species_id, info_type) VALUES(?,?,?,?,?,?,?)");
-    my $pri_insert_sth = $dbi->prepare("INSERT INTO primary_xref VALUES(?,?,?,?)");
-    my $pri_update_sth = $dbi->prepare("UPDATE primary_xref SET sequence=? WHERE xref_id=?");
-    my $syn_sth = $dbi->prepare("INSERT INTO synonym VALUES(?,?)");
-    my $dep_sth = $dbi->prepare("INSERT INTO dependent_xref (master_xref_id, dependent_xref_id, linkage_annotation, linkage_source_id) VALUES(?,?,?,?)");
-    my $xref_update_label_sth = $dbi->prepare("UPDATE xref SET label=? WHERE xref_id=?");
-    my $xref_update_descr_sth = $dbi->prepare("UPDATE xref SET description=? WHERE xref_id=?");
-    my $pair_sth = $dbi->prepare("INSERT INTO pairs VALUES(?,?,?)");
+    my $xref_sth = $dbi->prepare('INSERT INTO xref (accession,version,label,description,source_id,species_id, info_type) VALUES(?,?,?,?,?,?,?)');
+    my $pri_insert_sth = $dbi->prepare('INSERT INTO primary_xref VALUES(?,?,?,?)');
+    my $pri_update_sth = $dbi->prepare('UPDATE primary_xref SET sequence=? WHERE xref_id=?');
+    my $syn_sth = $dbi->prepare('INSERT INTO synonym VALUES(?,?)');
+    my $dep_sth = $dbi->prepare('INSERT INTO dependent_xref (master_xref_id, dependent_xref_id, linkage_annotation, linkage_source_id) VALUES(?,?,?,?)');
+    my $xref_update_label_sth = $dbi->prepare('UPDATE xref SET label=? WHERE xref_id=?');
+    my $xref_update_descr_sth = $dbi->prepare('UPDATE xref SET description=? WHERE xref_id=?');
+    my $pair_sth = $dbi->prepare('INSERT INTO pairs VALUES(?,?,?)');
 
 
     # disable error handling here as we'll do it ourselves
-    local $xref_sth->{RaiseError} = 0;
-    local $xref_sth->{PrintError} = 0;
+    $xref_sth->{RaiseError} = 0;
+    $xref_sth->{PrintError} = 0;
 
     #################################################################################
     # End of sql needed to add xrefs, primary_xrefs, synonym, dependent_xrefs etc..
@@ -446,8 +445,9 @@ sub upload_xref_object_graphs {
 
     foreach my $xref (@{$rxrefs}) {
        my $xref_id=undef;
-       if(!defined($xref->{ACCESSION})){
-	 print "your xref does not have an accession-number,so it can't be stored in the database\n";
+       if(!(defined $xref->{ACCESSION} )){
+	 print "Your xref does not have an accession-number,so it can't be stored in the database\n"
+	   || croak 'Could not write message';
 	 return;
        }
 
@@ -460,11 +460,11 @@ sub upload_xref_object_graphs {
 			 $xref->{DESCRIPTION},
 			 $xref->{SOURCE_ID},
 			 $xref->{SPECIES_ID},
-			 $xref->{INFO_TYPE} || "MISC")){
+			 $xref->{INFO_TYPE} || 'MISC')){
 	 #
-	 #  if we failed to add the xrefs it must already exist so go find the xref_id for this
+	 #  if we failed to add the xref it must already exist so go find the xref_id for this
 	 #
-	 if(!defined($xref->{SOURCE_ID})){
+	 if(!(defined $xref->{SOURCE_ID})){
 	   print "your xref: $xref->{ACCESSION} does not have a source-id\n";
 	   return;
 	 }
@@ -473,8 +473,12 @@ sub upload_xref_object_graphs {
 					 acc        => $xref->{ACCESSION},
 					 source_id  => $xref->{SOURCE_ID},
 					 species_id => $xref->{SPECIES_ID}} );
-	 $xref_update_label_sth->execute($xref->{LABEL},$xref_id) if (defined($xref->{LABEL}));
-	 $xref_update_descr_sth->execute($xref->{DESCRIPTION},$xref_id,) if (defined($xref->{DESCRIPTION}));
+	 if(defined $xref->{LABEL} ) {
+	   $xref_update_label_sth->execute($xref->{LABEL},$xref_id) ;
+	 }
+	 if(defined $xref->{DESCRIPTION} ){
+	   $xref_update_descr_sth->execute($xref->{DESCRIPTION},$xref_id);
+	 }
        }
        else{
 	 #
@@ -490,7 +494,7 @@ sub upload_xref_object_graphs {
        ################
        # Error checking
        ################
-       if(!(defined($xref_id) and $xref_id)){
+       if(!((defined $xref_id) and $xref_id)){
 	 print STDERR "xref_id is not set for :\n".
 	   "$xref->{ACCESSION}\n$xref->{LABEL}\n".
 	     "$xref->{DESCRIPTION}\n$xref->{SOURCE_ID}\n".
@@ -502,7 +506,7 @@ sub upload_xref_object_graphs {
        # create entry in primary_xref table with sequence; if this is a "cumulative"
        # entry it may already exist, and require an UPDATE rather than an INSERT
        #############################################################################
-       if(defined($xref->{SEQUENCE})){
+       if(defined $xref->{SEQUENCE} ){
 	 if ( $self->primary_xref_id_exists($xref_id) ) {
 	   $pri_update_sth->execute( $xref->{SEQUENCE}, $xref_id )
 	     or croak( $dbi->errstr() );
@@ -526,7 +530,7 @@ sub upload_xref_object_graphs {
        # if there are dependent xrefs, add xrefs and dependent xrefs for them
        #######################################################################
        foreach my $depref (@{$xref->{DEPENDENT_XREFS}}) {
-	 my %dep = %$depref;
+	 my %dep = %{$depref};
 
 	 #################
 	 # Insert the xref
@@ -534,10 +538,10 @@ sub upload_xref_object_graphs {
 	 $xref_sth->execute($dep{ACCESSION},
 			   $dep{VERSION} || 0,
 			   $dep{LABEL} || $dep{ACCESSION},
-			   $dep{DESCRIPTION} || "",
+			   $dep{DESCRIPTION} || '',
 			   $dep{SOURCE_ID},
 			   $xref->{SPECIES_ID},
-                           "DEPENDENT");
+                           'DEPENDENT');
 
 	 #####################################
 	 # find the xref_id for dependent xref
@@ -548,7 +552,7 @@ sub upload_xref_object_graphs {
 						 source_id  => $dep{SOURCE_ID},
 						 species_id => $xref->{SPECIES_ID}} );
 
-	 if(!defined($dep_xref_id) || $dep_xref_id ==0 ){
+	 if(!(defined $dep_xref_id) || $dep_xref_id ==0 ){
 	   print STDERR "acc = $dep{ACCESSION} \nlink = $dep{LINKAGE_SOURCE_ID} \n".$dbi->err."\n";
 	   print STDERR "source = $dep{SOURCE_ID}\n";
 	 }
@@ -574,7 +578,7 @@ sub upload_xref_object_graphs {
        #################################################
        # Add the pair data. refseq dna/pep pairs usually
        #################################################
-       if(defined($xref_id) and defined($xref->{PAIR})){
+       if(defined $xref_id and defined $xref->{PAIR} ){
 	 $pair_sth->execute($xref->{SOURCE_ID},$xref->{ACCESSION},$xref->{PAIR});
        }
 
@@ -582,9 +586,9 @@ sub upload_xref_object_graphs {
        ###########################
        # tidy up statement handles
        ###########################
-       $xref_sth->finish() if defined $xref_sth;
-       $pri_insert_sth->finish() if defined $pri_insert_sth;
-       $pri_update_sth->finish() if defined $pri_update_sth;
+       if(defined $xref_sth) {$xref_sth->finish()};
+       if(defined $pri_insert_sth) {$pri_insert_sth->finish()} ;
+       if(defined $pri_update_sth) {$pri_update_sth->finish()};
 
      }  # foreach xref
 
@@ -599,7 +603,7 @@ sub upload_xref_object_graphs {
 ######################################################################################
 sub upload_direct_xrefs{
   my ($self, $direct_xref)  = @_;
-  for my $dr(@$direct_xref) {
+  for my $dr(@{$direct_xref}) {
 
     ################################################
     # Find the xref_id for this accession and source
@@ -613,8 +617,8 @@ sub upload_direct_xrefs{
       $self->add_direct_xref($general_xref_id, $dr->{ENSEMBL_STABLE_ID},$dr->{ENSEMBL_TYPE},$dr->{LINKAGE_XREF});
     }
     else{
-      print {*STDERR} "Problem Could not find accession ".$dr->{ACCESSION}." for source ".$dr->{SOURCE}.
-	" soo not able ot add direct xref to ".$dr->{ENSEMBL_STABLE_ID}."\n";
+      print {*STDERR} 'Problem Could not find accession '.$dr->{ACCESSION}.' for source '.$dr->{SOURCE}.
+	' so not able to add direct xref to '.$dr->{ENSEMBL_STABLE_ID}."\n";
     }
   }
   return;
@@ -645,9 +649,9 @@ sub get_xref_sources {
 
   my $self = shift;
   my %sourcename_to_sourceid;
-  
+
   my $dbi = $self->dbi;
-  my $sth = $dbi->prepare("SELECT name,source_id FROM source");
+  my $sth = $dbi->prepare('SELECT name,source_id FROM source');
   $sth->execute() or croak( $dbi->errstr() );
   while(my @row = $sth->fetchrow_array()) {
     my $source_name = $row[0];
@@ -669,12 +673,12 @@ sub species_id2taxonomy {
   my %species_id2taxonomy;
   my $dbi = $self->dbi;
 
-  my $sth = $dbi->prepare("SELECT species_id, taxonomy_id FROM species");
+  my $sth = $dbi->prepare('SELECT species_id, taxonomy_id FROM species');
   $sth->execute() or croak( $dbi->errstr() );
   while(my @row = $sth->fetchrow_array()) {
     my $species_id = $row[0];
     my $taxonomy_id = $row[1];
-    if(defined($species_id2taxonomy{$species_id})){
+    if(defined $species_id2taxonomy{$species_id} ){
       push @{$species_id2taxonomy{$species_id}}, $taxonomy_id;
     }
     else{
@@ -695,7 +699,7 @@ sub species_id2name {
   my %species_id2name;
 
   my $dbi = $self->dbi;
-  my $sth = $dbi->prepare("SELECT species_id, name FROM species");
+  my $sth = $dbi->prepare('SELECT species_id, name FROM species');
   $sth->execute() or croak( $dbi->errstr() );
   while ( my @row = $sth->fetchrow_array() ) {
     my $species_id = $row[0];
@@ -706,11 +710,11 @@ sub species_id2name {
   ##############################################
   # Also populate the hash with all the aliases.
   ##############################################
-  $sth = $dbi->prepare("SELECT species_id, aliases FROM species");
+  $sth = $dbi->prepare('SELECT species_id, aliases FROM species');
   $sth->execute() or croak( $dbi->errstr() );
   while ( my @row = $sth->fetchrow_array() ) {
     my $species_id = $row[0];
-    foreach my $name ( split /,\s*/x, $row[1] ) {
+    foreach my $name ( split /,\s*/xms, $row[1] ) {
       $species_id2name{$species_id} ||= [];
       push @{$species_id2name{$species_id}}, $name;
     }
@@ -726,20 +730,19 @@ sub species_id2name {
 ###########################################################################
 sub get_xref_id {
   my ($self, $arg_ref) = @_;
-# $sth, $error, $acc, $source, $species) = @_;
-  my $sth     = $arg_ref->{sth}       || croak "Need a statement handle for get_xref_id";
-  my $acc     = $arg_ref->{acc}       || croak "Need an accession for get_xref_id";
-  my $source  = $arg_ref->{source_id} || croak "Need an source_id for get_xref_id";
-  my $species = $arg_ref->{species_id} || confess "Need an species_id for get_xref_id";
+  my $sth     = $arg_ref->{sth}        || croak 'Need a statement handle for get_xref_id';
+  my $acc     = $arg_ref->{acc}        || croak 'Need an accession for get_xref_id';
+  my $source  = $arg_ref->{source_id}  || croak 'Need an source_id for get_xref_id';
+  my $species = $arg_ref->{species_id} || confess 'Need an species_id for get_xref_id';
   my $error   = $arg_ref->{error};
 
   my $id;
 
-  if ($error and ($error == 1062)) {  # duplicate (okay so get the original) 
+  if ($error and ($error eq '1062')) {  # duplicate (okay so get the original) 
     $id = $self->get_xref($acc, $source, $species);
   }
   elsif ($error){
-    die "Error $error";
+    croak "Error $error";
   }
   else {
     $id = $sth->{'mysql_insertid'};
@@ -759,11 +762,11 @@ sub primary_xref_id_exists {
   my $exists = 0;
 
   my $dbi = $self->dbi;
-  my $sth = $dbi->prepare("SELECT xref_id FROM primary_xref WHERE xref_id=?");
+  my $sth = $dbi->prepare('SELECT xref_id FROM primary_xref WHERE xref_id=?');
   $sth->execute($xref_id) or croak( $dbi->errstr() );
   my @row = $sth->fetchrow_array();
   my $result = $row[0];
-  $exists = 1 if (defined $result);
+  if (defined $result) {$exists = 1; }
 
   return $exists;
 
@@ -794,7 +797,7 @@ sub get_taxonomy_from_species_id{
 sub get_direct_xref{
  my ($self,$stable_id,$type,$link) = @_;
 
- $type = lc($type);
+ $type = lc $type;
 
  my $dbi = $self->dbi;
  my $sql = "select general_xref_id from ${type}_direct_xref d where ensembl_stable_id = ?  and linkage_xref= ?";
@@ -819,8 +822,8 @@ sub get_xref{
   #
   # If the statement handle does nt exist create it.
   #
-  if(!defined($get_xref_sth)){
-    my $sql = "select xref_id from xref where accession = ? and source_id = ? and species_id = ?";
+  if(!(defined $get_xref_sth) ){
+    my $sql = 'select xref_id from xref where accession = ? and source_id = ? and species_id = ?';
     $get_xref_sth = $dbi->prepare($sql);
   }
 
@@ -841,14 +844,22 @@ sub get_xref{
 # else creates it and return the new xre_id
 ###########################################################
 sub add_xref {
-  my ( $self, $acc, $version, $label, $description, $source_id,
-       $species_id, $info_type ) = @_;
+  my ( $self, $arg_ref) = @_;
+
+  my $acc         = $arg_ref->{acc}        || croak 'add_xref needs aa acc';
+  my $source_id   = $arg_ref->{source_id}  || croak 'add_xref needs a source_id';
+  my $species_id  = $arg_ref->{species_id} || croak 'add_xref needs a species_id';
+  my $label       = $arg_ref->{label}      || $acc;
+  my $description = $arg_ref->{desc}       || '';
+  my $version     = $arg_ref->{version}    || 0;
+  my $info_type   = $arg_ref->{info_type}  || 'MISC';
+
 
   ##################################################################
   # See if it already exists. It so return the xref_id for this one.
   ##################################################################
   my $xref_id = $self->get_xref($acc,$source_id, $species_id);
-  if(defined($xref_id)){
+  if(defined $xref_id){
     return $xref_id;
   }
 
@@ -856,22 +867,22 @@ sub add_xref {
   #######################################################################
   # If the statement handle for the insertion of xrefs does not exist yet
   # then create it
-  ########################################################################
-  if ( !defined($add_xref_sth) ) {
+  #######################################################################
+  if (!(defined $add_xref_sth) ) {
     $add_xref_sth =
-      $self->dbi->prepare( "INSERT INTO xref "
-         . "(accession,version,label,description,source_id,species_id, info_type) "
-         . "VALUES(?,?,?,?,?,?,?)" );
+      $self->dbi->prepare( 'INSERT INTO xref '
+         . '(accession,version,label,description,source_id,species_id, info_type) '
+         . 'VALUES(?,?,?,?,?,?,?)' );
   }
 
   ######################################################################
   # If the description is more than 255 characters, chop it off and add
   # an indication that it has been truncated to the end of it.
   ######################################################################
-  if (defined($description) && (length($description) > 255 ) ) {
+  if (defined $description && ((length $description) > 255 ) ) {
     my $truncmsg = ' /.../';
-    substr( $description, 255 - length($truncmsg),
-            length($truncmsg), $truncmsg );
+    substr $description, 255 - (length $truncmsg),
+            length $truncmsg, $truncmsg;
   }
 
 
@@ -893,11 +904,11 @@ sub add_xref {
 sub add_to_direct_xrefs{
   my ($self, $arg_ref) = @_;
 
-  my $stable_id   = $arg_ref->{stable_id}   || croak ("Need a direct_xref on which this xref linked too" );
-  my $type        = $arg_ref->{type}        || croak ("Need a table type on which to add");
-  my $acc         = $arg_ref->{acc}         || croak ("Need an accession of this direct xref" );
-  my $source_id   = $arg_ref->{source_id}   || croak ("Need a source_id for this direct xref" );
-  my $species_id  = $arg_ref->{species_id}  || croak ("Need a species_id for this direct xref" );
+  my $stable_id   = $arg_ref->{stable_id}   || croak ('Need a direct_xref on which this xref linked too' );
+  my $type        = $arg_ref->{type}        || croak ('Need a table type on which to add');
+  my $acc         = $arg_ref->{acc}         || croak ('Need an accession of this direct xref' );
+  my $source_id   = $arg_ref->{source_id}   || croak ('Need a source_id for this direct xref' );
+  my $species_id  = $arg_ref->{species_id}  || croak ('Need a species_id for this direct xref' );
   my $version     = $arg_ref->{version}     || 0;
   my $label       = $arg_ref->{label}       || $acc;
   my $description = $arg_ref->{desc};
@@ -908,11 +919,10 @@ sub add_to_direct_xrefs{
   ######################
   # Get statement handle
   ######################
-  if(!defined($add_xref_sth)){
-    my $sql = (<<"AXX");
-INSERT INTO xref
-  (accession,version,label,description,source_id,species_id, info_type)
-  VALUES (?,?,?,?,?,?,?)
+  if(!(defined $add_xref_sth)){
+    my $sql = (<<'AXX');
+  INSERT INTO xref (accession,version,label,description,source_id,species_id, info_type)
+          VALUES (?,?,?,?,?,?,?)
 AXX
     $add_xref_sth = $dbi->prepare($sql);
   }
@@ -921,10 +931,10 @@ AXX
   # If the acc already has an xrefs find it else cretae a new one
   ###############################################################
   my $direct_id = $self->get_xref($acc, $source_id, $species_id);
-  if(!defined($direct_id)){
+  if(!(defined $direct_id)){
     $add_xref_sth->execute(
         $acc, $version || 0, $label,
-        $description, $source_id, $species_id, "DIRECT"
+        $description, $source_id, $species_id, 'DIRECT'
     ) or croak("$acc\t$label\t\t$source_id\t$species_id\n");
   }
 
@@ -933,7 +943,7 @@ AXX
   #########################
   # Now add the direct info
   #########################
-  $self->add_direct_xref($direct_id, $stable_id, $type, "");
+  $self->add_direct_xref($direct_id, $stable_id, $type, '');
   return;
 }
 
@@ -949,23 +959,23 @@ sub add_direct_xref {
   #######################################################
   # Create statement handles if they do not exist already
   ########################################################
-  if (!defined($add_direct_xref_sth{$ensembl_type})){
-    my $add_gene_direct_xref_sth = $dbi->prepare("INSERT INTO gene_direct_xref VALUES(?,?,?)");
-    my $add_tr_direct_xref_sth = $dbi->prepare("INSERT INTO transcript_direct_xref VALUES(?,?,?)");
-    my $add_tl_direct_xref_sth = $dbi->prepare("INSERT INTO translation_direct_xref VALUES(?,?,?)");
-    $add_direct_xref_sth{"gene"} = $add_gene_direct_xref_sth;
-    $add_direct_xref_sth{"transcript"} = $add_tr_direct_xref_sth;
-    $add_direct_xref_sth{"translation"} = $add_tl_direct_xref_sth;
-    $add_direct_xref_sth{"Gene"} = $add_gene_direct_xref_sth;
-    $add_direct_xref_sth{"Transcript"} = $add_tr_direct_xref_sth;
-    $add_direct_xref_sth{"Translation"} = $add_tl_direct_xref_sth;
+  if (!(defined $add_direct_xref_sth{$ensembl_type})){
+    my $add_gene_direct_xref_sth = $dbi->prepare('INSERT INTO gene_direct_xref VALUES(?,?,?)');
+    my $add_tr_direct_xref_sth = $dbi->prepare('INSERT INTO transcript_direct_xref VALUES(?,?,?)');
+    my $add_tl_direct_xref_sth = $dbi->prepare('INSERT INTO translation_direct_xref VALUES(?,?,?)');
+    $add_direct_xref_sth{'gene'}        =  $add_gene_direct_xref_sth;
+    $add_direct_xref_sth{'transcript'}  = $add_tr_direct_xref_sth;
+    $add_direct_xref_sth{'translation'} = $add_tl_direct_xref_sth;
+    $add_direct_xref_sth{'Gene'}        = $add_gene_direct_xref_sth;
+    $add_direct_xref_sth{'Transcript'}  = $add_tr_direct_xref_sth;
+    $add_direct_xref_sth{'Translation'} = $add_tl_direct_xref_sth;
   }
 
   ##############################
   # Make sure type is recognised
   ##############################
-  if(!defined($add_direct_xref_sth{$ensembl_type})){
-    croak "ERROR add_direct_xref_sth does not exist for $ensembl_type ???\n"; 
+  if(!(defined $add_direct_xref_sth{$ensembl_type})){
+    croak "ERROR add_direct_xref_sth does not exist for $ensembl_type ???";
   }
   else{
     ##########################
@@ -983,10 +993,10 @@ sub add_direct_xref {
 sub add_dependent_xref{
   my ($self, $arg_ref) = @_;
 
-  my $master_xref = $arg_ref->{master_xref_id} || croak( "Need a master_xref_id on which this xref depends on" );
-  my $acc         = $arg_ref->{acc}            || croak( "Need an accession of this dependent xref" );
-  my $source_id   = $arg_ref->{source_id}      || croak( "Need a source_id for this dependent xref" );
-  my $species_id  = $arg_ref->{species_id}     || croak( "Need a species_id for this dependent xref" );
+  my $master_xref = $arg_ref->{master_xref_id} || croak( 'Need a master_xref_id on which this xref depends on' );
+  my $acc         = $arg_ref->{acc}            || croak( 'Need an accession of this dependent xref' );
+  my $source_id   = $arg_ref->{source_id}      || croak( 'Need a source_id for this dependent xref' );
+  my $species_id  = $arg_ref->{species_id}     || croak( 'Need a species_id for this dependent xref' );
   my $version     = $arg_ref->{version}        ||  0;
   my $label       = $arg_ref->{label}          || $acc;
   my $description = $arg_ref->{desc};
@@ -997,16 +1007,16 @@ sub add_dependent_xref{
   ########################################
   # Create/Get the statement handle needed
   ########################################
-  if(!defined($add_xref_sth)){
-    my $sql = (<<"IXR");
+  if(!(defined $add_xref_sth)){
+    my $sql = (<<'IXR');
 INSERT INTO xref
   (accession,version,label,description,source_id,species_id, info_type)
   VALUES (?,?,?,?,?,?,?)
 IXR
     $add_xref_sth = $dbi->prepare($sql);
   }
-  if(!defined($add_dependent_xref_sth)){
-    my $sql = (<<"ADX");
+  if(!(defined $add_dependent_xref_sth)){
+    my $sql = (<<'ADX');
 INSERT INTO dependent_xref 
   (master_xref_id,dependent_xref_id,linkage_annotation,linkage_source_id)
   VALUES (?,?,?,?)
@@ -1019,10 +1029,10 @@ ADX
   # else create it and get the new xref_id
   ####################################################
   my $dependent_id = $self->get_xref($acc, $source_id, $species_id);
-  if(!defined($dependent_id)){
+  if(!(defined $dependent_id)){
     $add_xref_sth->execute(
         $acc, $version, $label,
-        $description, $source_id, $species_id, "DEPENDENT"
+        $description, $source_id, $species_id, 'DEPENDENT'
     ) or croak("$acc\t$label\t\t$source_id\t$species_id\n");
   }
   $dependent_id = $self->get_xref($acc, $source_id, $species_id);
@@ -1030,7 +1040,7 @@ ADX
   ################################################
   # Croak if we have failed to create.get the xref
   ################################################
-  if(!defined($dependent_id)){
+  if(!(defined $dependent_id)){
     croak("$acc\t$label\t\t$source_id\t$species_id\n");
   }
 
@@ -1038,11 +1048,11 @@ ADX
   # If the dependency has not already been set ( is already in hash xref_dependent_mapped)
   # then add it
   ########################################################################################
-  if(!defined($xref_dependent_mapped{$master_xref."|".$dependent_id})){
+  if(!(defined $xref_dependent_mapped{"$master_xref|$dependent_id"})){
     $add_dependent_xref_sth->execute( $master_xref, $dependent_id, $linkage,
 				      $source_id )
       or croak("$master_xref\t$dependent_id\t$linkage\t$source_id");
-    $xref_dependent_mapped{$master_xref."|".$dependent_id} = 1;
+    $xref_dependent_mapped{"$master_xref|$dependent_id"} = 1;
   }
 
   return $dependent_id;
@@ -1057,13 +1067,13 @@ sub add_to_syn_for_mult_sources{
   my ($self, $acc, $sources, $syn, $species_id) = @_;
 
   my $dbi = $self->dbi;
-  if(!defined($add_synonym_sth)){
-    $add_synonym_sth =  $dbi->prepare("INSERT INTO synonym VALUES(?,?)");
+  if(!(defined $add_synonym_sth)){
+    $add_synonym_sth =  $dbi->prepare('INSERT INTO synonym VALUES(?,?)');
   }
 
-  foreach my $source_id (@$sources){
+  foreach my $source_id (@{$sources}){
     my $xref_id = $self->get_xref($acc, $source_id, $species_id);
-    if(defined($xref_id)){
+    if(defined $xref_id){
       $add_synonym_sth->execute( $xref_id, $syn )
         or croak( $dbi->errstr() . "\n $xref_id\n $syn\n" );
     }
@@ -1079,11 +1089,11 @@ sub add_to_syn{
   my ($self, $acc, $source_id, $syn, $species_id) = @_;
 
   my $dbi = $self->dbi;
-  if(!defined($add_synonym_sth)){
-    $add_synonym_sth =  $dbi->prepare("INSERT INTO synonym VALUES(?,?)");
+  if(!(defined $add_synonym_sth)){
+    $add_synonym_sth =  $dbi->prepare('INSERT INTO synonym VALUES(?,?)');
   }
   my $xref_id = $self->get_xref($acc, $source_id, $species_id);
-  if(defined($xref_id)){
+  if(defined $xref_id){
     $add_synonym_sth->execute( $xref_id, $syn )
       or croak( $dbi->errstr() . "\n $xref_id\n $syn\n" );
   }
@@ -1102,8 +1112,8 @@ sub add_synonym{
   my ($self, $xref_id, $syn) = @_;
 
   my $dbi=$self->dbi;
-  if(!defined($add_synonym_sth)){
-    $add_synonym_sth =  $dbi->prepare("INSERT INTO synonym VALUES(?,?)");
+  if(!(defined $add_synonym_sth)){
+    $add_synonym_sth =  $dbi->prepare('INSERT INTO synonym VALUES(?,?)');
   }
 
     $add_synonym_sth->execute( $xref_id, $dbi->quote($syn) )
@@ -1127,10 +1137,10 @@ SELECT  xref.accession, xref.label
     WHERE source.name LIKE '$name%' AND
           xref.source_id = source.source_id
 GLA
-  if(defined($prio_desc)){
+  if(defined $prio_desc){
     $sql .= " and source.priority_description like '$prio_desc'";
   }
-  if(defined($species_id)){
+  if(defined $species_id){
     $sql .= " and xref.species_id  = $species_id";
   }
   my $sub_sth = $self->dbi->prepare($sql);
@@ -1153,10 +1163,10 @@ SELECT  xref.accession, synonym.synonym
            xref.source_id = source.source_id
 GLS
 
-  if(defined($prio_desc)){
+  if(defined $prio_desc){
     $sql .= " AND source.priority_description LIKE '$prio_desc'";
   }
-  if(defined($species_id)){
+  if(defined $species_id){
     $sql .= " AND xref.species_id  = $species_id";
   }
   $sub_sth = $self->dbi->prepare($sql);
@@ -1185,10 +1195,10 @@ sub get_label_to_desc{
       WHERE source.name LIKE '$name%' AND 
             xref.source_id = source.source_id
 GDH
-  if(defined($prio_desc)){
+  if(defined $prio_desc){
     $sql .= " and source.priority_description like '$prio_desc'";
   }
-  if(defined($species_id)){
+  if(defined $species_id){
     $sql .= " and xref.species_id  = $species_id";
   }
   my $sub_sth = $self->dbi->prepare($sql);
@@ -1210,10 +1220,10 @@ GDH
              xref.source_id = source.source_id
 GDS
 
-  if(defined($prio_desc)){
+  if(defined $prio_desc){
     $syn_sql .= " AND source.priority_description LIKE '$prio_desc'";
   }
-  if(defined($species_id)){
+  if(defined $species_id){
     $syn_sql .= " AND xref.species_id  = $species_id";
   }
   $sub_sth = $self->dbi->prepare($syn_sql);
@@ -1227,36 +1237,6 @@ GDS
 }
 
 
-#
-#
-#
-#sub get_accession_from_label{
-#  my ($self, $name) = @_;
-
-#  my $sql = "select xref.accession from xref where xref.label like '$name'";
-#  my $sub_sth = $self->dbi->prepare($sql);
-
-#  $sub_sth->execute();
-#  while(my @row = $sub_sth->fetchrow_array()) {
-#    return $row[0];
-#  }
-#  return;
-#}
-
-#sub get_sub_list{
-#  my ($self, $name) = @_;
-#  my @list=();
-
-#  my $sql = "select xref.accession from xref where xref.accession like '$name%'";
-#  my $sub_sth = $self->dbi->prepare($sql);
-
-#  $sub_sth->execute();
-#  while(my @row = $sub_sth->fetchrow_array()) {
-#    push @list, $row[0];
-#  }
-#  return @list;
-#}
-
 ########################################
 # Set release for a particular source_id.
 ########################################
@@ -1264,10 +1244,9 @@ sub set_release{
     my ($self, $source_id, $s_release ) = @_;
 
     my $sth =
-      $self->dbi->prepare(
-        "UPDATE source SET source_release=? WHERE source_id=?");
+      $self->dbi->prepare('UPDATE source SET source_release=? WHERE source_id=?');
 
-    print "Setting release to '$s_release' for source ID '$source_id'\n" if($verbose);
+    if($verbose) { print "Setting release to '$s_release' for source ID '$source_id'\n"; }
 
     $sth->execute( $s_release, $source_id );
     return;
@@ -1294,7 +1273,7 @@ GDM
   my $dependent_xref;
   $sth->bind_columns(\$master_xref,\$dependent_xref);
   while($sth->fetch){
-    $xref_dependent_mapped{$master_xref."|".$dependent_xref}=1;
+    $xref_dependent_mapped{"$master_xref|$dependent_xref"}=1;
   }
   $sth->finish;
   return;
@@ -1310,6 +1289,7 @@ sub get_ext_synonyms{
   my $source_name = shift;
   my %ext_syns;
   my %seen;          # can be in more than once fro each type of external source.
+  my $separator = qw{:};
 
   my $sql =(<<"GES");
   SELECT  x.accession, x.label, sy.synonym
@@ -1326,12 +1306,12 @@ GES
 
   my $count = 0;
   while($sth->fetch){
-    if(!defined($seen{$acc.":".$syn})){
+    if(!(defined $seen{$acc.$separator.$syn})){
       push @{$ext_syns{$acc}}, $syn;
       push @{$ext_syns{$label}}, $syn;
       $count++;
     }
-    $seen{$acc.":".$syn} = 1;
+    $seen{$acc.$separator.$syn} = 1;
   }
   $sth->finish;
 
@@ -1357,17 +1337,17 @@ sub parsing_finished_store_data {
   # identity_xref                               object_xref_id
 
   my %table_and_key =
-    ( 'xref' => "xref_id", 'object_xref' => "object_xref_id" );
+    ( 'xref' => 'xref_id', 'object_xref' => 'object_xref_id' );
 
   foreach my $table ( keys %table_and_key ) {
     my $sth = $self->dbi->prepare(
-             "select MAX(" . $table_and_key{$table} . ") from $table" );
+             'select MAX(' . $table_and_key{$table} . ") from $table" );
     $sth->execute;
     my $max_val;
     $sth->bind_columns( \$max_val );
     $sth->fetch;
     $sth->finish;
-    $self->add_meta_pair( "PARSED_" . $table_and_key{$table},
+    $self->add_meta_pair( 'PARSED_' . $table_and_key{$table},
                           $max_val || 1 );
   }
   return;
-- 
GitLab