From 2b13f775c9f4c7f666a5d69191d2fb30bcb32341 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kusalananda=20K=C3=A4h=C3=A4ri?=
 <ak4@sanger.ac.uk>
Date: Fri, 23 Feb 2007 15:09:23 +0000
Subject: [PATCH] Allow the Xref parsers to read gzip compressed files through
 a zcat pipe.  Use the command line switch -compressed with xref_parser.pl to
 do this.  The default behaviour (without the -compressed switch) is still to
 uncompress gzipped files.

---
 .../XrefParser/AedesGenBankParser.pm          |  26 ++--
 .../xref_mapping/XrefParser/AgilentParser.pm  |  27 ++---
 .../XrefParser/AnophelesSymbolParser.pm       |  26 ++--
 .../xref_mapping/XrefParser/BaseParser.pm     | 113 +++++++++++++-----
 .../xref_mapping/XrefParser/CCDSParser.pm     |  28 ++---
 .../xref_mapping/XrefParser/CeleraParser.pm   |  22 +---
 .../XrefParser/CeleraProteinParser.pm         |  21 +---
 .../XrefParser/CeleraTranscriptParser.pm      |  21 +---
 .../xref_mapping/XrefParser/CodelinkParser.pm |  24 ++--
 .../xref_mapping/XrefParser/DatabaseParser.pm |  12 +-
 .../XrefParser/EntrezGeneParser.pm            |  31 ++---
 .../xref_mapping/XrefParser/FastaParser.pm    |  14 +--
 .../XrefParser/Flybase_dmel_GFFv3_Parser.pm   |  23 ++--
 .../xref_mapping/XrefParser/GOParser.pm       |  27 ++---
 .../xref_mapping/XrefParser/HUGOParser.pm     |  25 ++--
 .../XrefParser/HUGO_CCDSParser.pm             |  24 ++--
 .../XrefParser/HUGO_ENSGParser.pm             |  26 ++--
 .../xref_mapping/XrefParser/IPIParser.pm      |  26 ++--
 .../xref_mapping/XrefParser/IlluminaParser.pm |  23 +---
 .../xref_mapping/XrefParser/InterproParser.pm |  31 ++---
 .../xref_mapping/XrefParser/JGI_Parser.pm     |  31 ++---
 .../XrefParser/JGI_ProteinParser.pm           |  13 +-
 .../xref_mapping/XrefParser/MGDParser.pm      |  44 +++----
 .../xref_mapping/XrefParser/MIMParser.pm      |  36 +++---
 .../xref_mapping/XrefParser/OTTTParser.pm     |  26 ++--
 .../xref_mapping/XrefParser/RGDParser.pm      |  27 ++---
 .../XrefParser/RefSeqGPFFParser.pm            |  32 ++---
 .../xref_mapping/XrefParser/RefSeqParser.pm   |  48 ++++----
 .../XrefParser/RefSeq_CCDSParser.pm           |  30 ++---
 .../xref_mapping/XrefParser/SGDParser.pm      |  29 ++---
 .../xref_mapping/XrefParser/SegmentParser.pm  |  26 ++--
 .../xref_mapping/XrefParser/UniGeneParser.pm  |  49 ++++----
 .../xref_mapping/XrefParser/UniProtParser.pm  |  38 ++----
 .../XrefParser/UniProtVarSplicParser.pm       |  29 ++---
 .../XrefParser/WilsonAffyParser.pm            |  35 +++---
 .../xref_mapping/XrefParser/WormPepParser.pm  |  25 ++--
 .../WormbaseDatabaseStableIDParser.pm         |  13 +-
 .../XrefParser/XenopusJamboreeParser.pm       |  26 ++--
 .../xref_mapping/XrefParser/ZFINParser.pm     |  46 ++++---
 .../xref_mapping/XrefParser/ncRNAParser.pm    |  26 ++--
 misc-scripts/xref_mapping/xref_parser.pl      |  62 ++++++----
 41 files changed, 508 insertions(+), 753 deletions(-)

diff --git a/misc-scripts/xref_mapping/XrefParser/AedesGenBankParser.pm b/misc-scripts/xref_mapping/XrefParser/AedesGenBankParser.pm
index 5769b8da38..7fb3838057 100644
--- a/misc-scripts/xref_mapping/XrefParser/AedesGenBankParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/AedesGenBankParser.pm
@@ -3,10 +3,7 @@ package XrefParser::AedesGenBankParser;
 use strict;
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 #Aedes GenBank protein - because not yet in UniProt
 #>EAT48991.1
@@ -25,11 +22,13 @@ sub run {
 
   local $/ = "\n>";
 
-  if(!open(FILE,"<".$file)){
-    print "Could not open $file\n";
-    return 1;
+  my $file_io = $self->get_filehandle($file);
+  if ( !defined $file_io ) {
+      print "Could not open $file\n";
+      return 1;
   }
-  while (<FILE>) {
+
+  while ( $_ = $file_io->getline() ) {
 
     my $xref;
 
@@ -73,7 +72,7 @@ sub run {
 
   }
 
-  close (FILE);
+  $file_io->close();
 
   print scalar(@xrefs) . " AedesGenBank xrefs succesfully parsed\n";
 
@@ -83,13 +82,4 @@ sub run {
   return 0;
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::AedesGenBankParser";
-  return $self;
-
-}
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/AgilentParser.pm b/misc-scripts/xref_mapping/XrefParser/AgilentParser.pm
index 0817341726..a9a37f555c 100644
--- a/misc-scripts/xref_mapping/XrefParser/AgilentParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/AgilentParser.pm
@@ -3,10 +3,7 @@ package XrefParser::AgilentParser;
 use strict;
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # OParser for FASTA-format probe mappings from Agilent
 # >A_23_P253586
@@ -22,12 +19,15 @@ sub run {
 
 #  local $/ = "\n>";
 
-  if(!open(AG,"<".$file)){
-    print "Could not open $file\n";
-    return 1;
+  my $ag_io = $self->get_filehandle($file);
+
+  if ( !defined $ag_io ) {
+      print "Could not open $file\n";
+      return 1;
   }
+
   my $probe;
-  while (<AG>) {
+  while ( $_ = $ag_io->getline() ) {
 
     chomp;
 
@@ -57,7 +57,7 @@ sub run {
     }
   }
 
-  close(AG);
+  $ag_io->close();
 
   print scalar(@xrefs) . " Agilent xrefs succesfully parsed\n";
 
@@ -67,13 +67,4 @@ sub run {
   return 0;
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::AgilentParser";
-  return $self;
-
-}
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/AnophelesSymbolParser.pm b/misc-scripts/xref_mapping/XrefParser/AnophelesSymbolParser.pm
index eaf5a838e7..641b724dfb 100644
--- a/misc-scripts/xref_mapping/XrefParser/AnophelesSymbolParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/AnophelesSymbolParser.pm
@@ -3,10 +3,7 @@ package XrefParser::AnophelesSymbolParser;
 use strict;
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # AnophelesSymbol database dump for anopheles - FASTA format
 #
@@ -25,12 +22,14 @@ sub run {
 
   local $/ = "\n>";
 
-  if(!open(FILE,"<".$file)){
-    print "Could not open $file\n";
-    return 1;
+  my $file_io = $self->get_filehandle($file);
+
+  if ( !defined $file_io ) {
+      print "Could not open $file\n";
+      return 1;
   }
-  while (<FILE>) {
 
+  while ( $_ = $file_io->getline() ) {
     my $xref;
 
     my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn("Can't parse FASTA entry: $_\n");
@@ -55,7 +54,7 @@ sub run {
 
   }
 
-  close (FILE);
+  $file_io->close();
 
   print scalar(@xrefs) . " AnophelesSymbol xrefs succesfully parsed\n";
 
@@ -65,13 +64,4 @@ sub run {
   return 0;
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::AnophelesSymbolParser";
-  return $self;
-
-}
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm
index 9b368624af..ebcaf7246b 100644
--- a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm
@@ -27,16 +27,27 @@ my %dependent_sources;
 my %taxonomy2species_id;
 my %name2species_id;
 
-my ($host, $port, $dbname, $user, $pass, $create, $release, $cleanup, $deletedownloaded);
-my ($skipdownload,$drop_db,$checkdownload, $dl_path) ;
+my (
+    $host,         $port,    $dbname,
+    $user,         $pass,    $create,
+    $release,      $cleanup, $deletedownloaded,
+    $skipdownload, $drop_db, $checkdownload,
+    $dl_path,      $compressed
+);
 
 # --------------------------------------------------------------------------------
 # Get info about files to be parsed from the database
 
-sub run {
-
-  ($host, $port, $dbname, $user, $pass, my $speciesr, my $sourcesr, $skipdownload, $checkdownload, 
-    $create, $release, $cleanup, $drop_db, $deletedownloaded, $dl_path, my $notsourcesr) = @_;
+sub run
+{
+    (
+        $host,           $port,             $dbname,
+        $user,           $pass,             my $speciesr,
+        my $sourcesr,    $skipdownload,     $checkdownload,
+        $create,         $release,          $cleanup,
+        $drop_db,        $deletedownloaded, $dl_path,
+        my $notsourcesr, $compressed
+    ) = @_;
 
   $base_dir = $dl_path if $dl_path;
 
@@ -195,8 +206,8 @@ sub run {
 
     if ($checkdownload) {
         my $check_file = $dir . '/' . $file;
-        $check_file =~ s/\.gz$//;
-        $check_file =~ s/\.Z$//;
+
+        if ( !$compressed ) { $check_file =~ s/\.(gz|Z)$// }
 
         print "Checking for file '$check_file'\n";
 
@@ -206,8 +217,7 @@ sub run {
 
             $skipdownload = 1;
 
-            $file =~ s/\.gz$//;
-            $file =~ s/\.Z$//;
+            if ( !$compressed ) { $file =~ s/\.(gz|Z)$// }
         } else {
             print "File '$check_file' does not exist.\n"
               . "Scheduling '$dir/$file' for download...\n";
@@ -265,22 +275,27 @@ sub run {
 	  croak("Could not get $type file $file tried 5 times but failed");
 	}
 
-	# if the file is compressed, the FTP server may or may not have automatically uncompressed it
-	# TODO - read .gz file directly? open (FILE, "zcat $file|") or Compress::Zlib
-	if ($file =~ /(.*)\.gz$/ or $file =~ /(.*)\.Z$/) {
-	  print "Uncompressing $dir/$file\n";
-	  system("gunzip -f $dir/$file");
-	  $file = $1;
-	}
-	if ($file =~ /(.*)\.zip$/) {
-	  print "Unzipping $dir/$file\n";
-	  system("unzip -o -q -d $dir $dir/$file");
-	}
+        # If the file is compressed, the FTP server may or may not have
+        # automatically uncompressed it (it shouldn't have, is this an
+        # historical artifact? (ak)).
+
+        if ( !$compressed && ( $file =~ /\.(gz|Z)$/ ) ) {
+            print "Uncompressing '$dir/$file' using 'gunzip'\n";
+            system( "gunzip", "-f", $dir . '/' . $file );
+        }
 
+        if ( $file =~ /(.*)\.zip$/ ) {
+            print "Uncompressing '$dir/$file' using 'unzip'\n";
+            system( "unzip", "-o", "-q", "-d", $dir,
+                $dir . '/' . $file );
+        }
       }
 
-      $file =~s/\.gz$//; # if skipdownload set this will not have been done yet.
-      $file=~s/\.Z$//;   # if it has no harm done
+      if ( !$compressed ) {
+        $file =~ s/\.(gz|Z)$//;    # If skipdownload set this will
+                                   # not have been done yet.
+                                   # If it has, no harm done
+      }
 
       if ($file_from_archive) {
 	push @new_file, $file_from_archive;
@@ -361,13 +376,57 @@ sub run {
 
 # --------------------------------------------------------------------------------
 
-sub new {
+# Given a file name, returns a IO::Handle object.  If the file is
+# gzipped, the handle will be to an unseekable stream coming out of a
+# zcat pipe.  If the given file name doesn't correspond to an existing
+# file, the routine will try to add '.gz' to the file name or to remove
+# any .'Z' or '.gz' and try again.  Returns undef on failure and will
+# write a warning to stderr.
+
+sub get_filehandle
+{
+    my ($self, $file_name) = @_;
+
+    my $io;
+
+    my $alt_file_name = $file_name;
+    $alt_file_name =~ s/\.(gz|Z)$//;
 
-  my $self = {};
-  bless $self, "BaseParser";
+    if ( $alt_file_name eq $file_name ) {
+        $alt_file_name .= '.gz';
+    }
+
+    if ( !-f $file_name ) {
+        carp(   "File '$file_name' does not exist, "
+              . "will try '$alt_file_name'" );
+        $file_name = $alt_file_name;
+    }
+
+    if ( $file_name =~ /\.(gz|Z)$/ ) {
+        # Read from zcat pipe
+        $io = IO::File->new("zcat $file_name |")
+          or carp("Can not open file '$file_name' with 'zcat'");
+    } else {
+        # Read file normally
+        $io = IO::File->new($file_name)
+          or carp("Can not open file '$file_name'");
+    }
+
+    if ( !defined $io ) { return undef }
 
-  return $self;
+    print "Reading from '$file_name'...\n";
+
+    return $io;
+}
+
+# --------------------------------------------------------------------------------
+
+sub new
+{
+    my ($proto) = @_;
 
+    my $class = ref $proto || $proto;
+    return bless {}, $class;
 }
 
 # --------------------------------------------------------------------------------
diff --git a/misc-scripts/xref_mapping/XrefParser/CCDSParser.pm b/misc-scripts/xref_mapping/XrefParser/CCDSParser.pm
index 877a5995a3..e5fdc637b6 100644
--- a/misc-scripts/xref_mapping/XrefParser/CCDSParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/CCDSParser.pm
@@ -4,10 +4,7 @@ use strict;
 
 use DBI;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # Parse file of CCDS records and assign direct xrefs
 # All assumed to be linked to transcripts
@@ -18,17 +15,19 @@ sub run {
 
   my ($self, $file, $source_id, $species_id) = @_;
 
-  if(!open(CCDS,"<".$file)){
-    print "Could not open $file\n";
-    return 1;
+  my $ccds_io = $self->get_filehandle($file);
+
+  if ( !defined $ccds_io ) {
+      print "Could not open $file\n";
+      return 1;
   }
+
   my $line_count = 0;
   my $xref_count = 0;
 
   my $xref_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND version=? AND source_id=$source_id AND species_id=$species_id");
 
-  while (<CCDS>) {
-
+  while ( $_ = $ccds_io->getline() ) {
     my ($stable_id, $ccds) = split;
 
     my ($acc, $version) = split (/\./, $ccds);
@@ -48,17 +47,8 @@ sub run {
 
   print "Parsed $line_count CCDS identifiers from $file, added $xref_count xrefs and $line_count direct_xrefs\n";
 
-  close(CCDS);
+  $ccds_io->close();
   return 0;
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::CCDSParser";
-  return $self;
-
-}
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/CeleraParser.pm b/misc-scripts/xref_mapping/XrefParser/CeleraParser.pm
index ab51ee815d..0d65429397 100644
--- a/misc-scripts/xref_mapping/XrefParser/CeleraParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/CeleraParser.pm
@@ -3,10 +3,7 @@ package XrefParser::CeleraParser;
 use strict;
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # Celera database dump for anopheles - FASTA format
 #
@@ -27,12 +24,14 @@ sub run {
 
   local $/ = "\n>";
 
-  if(!open(FILE,"<".$file)){
+  my $file_io = $self->get_filehandle($file);
+
+  if ( !defined $file_io ) {
     print "Could not open $file\n";
     return 1;
   }
-  while (<FILE>) {
 
+  while ( $_ = $file_io->getline() ) {
     next if (/^File:/);   # skip header
 
     my $xref;
@@ -69,7 +68,7 @@ sub run {
 
   }
 
-  close (FILE);
+  $file_io->close();
 
   print scalar(@xrefs) . " Celera xrefs succesfully parsed\n";
 
@@ -79,13 +78,4 @@ sub run {
   return 0;
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::CeleraParser";
-  return $self;
-
-}
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/CeleraProteinParser.pm b/misc-scripts/xref_mapping/XrefParser/CeleraProteinParser.pm
index 1325537fb7..a51cc21403 100644
--- a/misc-scripts/xref_mapping/XrefParser/CeleraProteinParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/CeleraProteinParser.pm
@@ -2,26 +2,13 @@ package XrefParser::CeleraProteinParser;
 
 use strict;
 
-use XrefParser::CeleraParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::CeleraParser);
+use base qw( XrefParser::CeleraParser );
 
 # See CeleraParser for details
 
-sub get_sequence_type() {
-
-  return 'peptide';
-
-}
-
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::CeleraProteinParser";
-  return $self;
-
+sub get_sequence_type()
+{
+    return 'peptide';
 }
 
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/CeleraTranscriptParser.pm b/misc-scripts/xref_mapping/XrefParser/CeleraTranscriptParser.pm
index 99a2e8de6e..fb08e9f140 100644
--- a/misc-scripts/xref_mapping/XrefParser/CeleraTranscriptParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/CeleraTranscriptParser.pm
@@ -2,26 +2,13 @@ package XrefParser::CeleraTranscriptParser;
 
 use strict;
 
-use XrefParser::CeleraParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::CeleraParser);
+use base qw( XrefParser::CeleraParser );
 
 # See CeleraParser for details
 
-sub get_sequence_type() {
-
-  return 'dna';
-
-}
-
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::CeleraTranscriptParser";
-  return $self;
-
+sub get_sequence_type()
+{
+    return 'dna';
 }
 
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/CodelinkParser.pm b/misc-scripts/xref_mapping/XrefParser/CodelinkParser.pm
index 433ce2e043..061ad01da6 100644
--- a/misc-scripts/xref_mapping/XrefParser/CodelinkParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/CodelinkParser.pm
@@ -3,10 +3,7 @@ package XrefParser::CodelinkParser;
 use strict;
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # Parser for Codelink probes
 
@@ -23,13 +20,13 @@ sub run {
 
   local $/ = "\n>";
 
-  if(!open(CODELINK,"<".$file)){
+  my $codelink_io = $self->get_filehandle($file);
+  if ( !defined $codelink_io ) {
     print "ERROR: Could not open $file\n";
-    return 1; # 1 = error
+    return 1;    # 1 = error
   }
 
-  while (<CODELINK>) {
-
+  while ( $_ = $codelink_io->getline() ) {
     my $xref;
 
     my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn("Can't parse FASTA entry: $_\n");
@@ -53,6 +50,8 @@ sub run {
 
   }
 
+  $codelink_io->close();
+
   print scalar(@xrefs) . " Codelink xrefs succesfully parsed\n";
 
   XrefParser::BaseParser->upload_xref_object_graphs(\@xrefs);
@@ -61,13 +60,4 @@ sub run {
   return 0; #successful
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::CodelinkParser";
-  return $self;
-
-}
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/DatabaseParser.pm b/misc-scripts/xref_mapping/XrefParser/DatabaseParser.pm
index a63494a011..ac72f18c90 100644
--- a/misc-scripts/xref_mapping/XrefParser/DatabaseParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/DatabaseParser.pm
@@ -3,10 +3,8 @@ package XrefParser::DatabaseParser;
 use strict;
 
 use DBI;
-use XrefParser::BaseParser;
 
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # Base class for parsers that parse from databases rather than files
 
@@ -58,13 +56,5 @@ sub db {
 
 }
 
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::DatabaseParser";
-  return $self;
-
-}
-
 1;
 
diff --git a/misc-scripts/xref_mapping/XrefParser/EntrezGeneParser.pm b/misc-scripts/xref_mapping/XrefParser/EntrezGeneParser.pm
index c17780f382..750e46e1d1 100644
--- a/misc-scripts/xref_mapping/XrefParser/EntrezGeneParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/EntrezGeneParser.pm
@@ -4,11 +4,7 @@ use strict;
 use POSIX qw(strftime);
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
-
+use base qw( XrefParser::BaseParser );
 
 # --------------------------------------------------------------------------------
 # Parse command line and run if being run directly
@@ -41,14 +37,15 @@ sub run {
   my $species_tax_id = $self->get_taxonomy_from_species_id($species_id);
   
 
-  if(!open(EG,"<".$file)){
-    print  "ERROR: Could not open $file\n";
-    return 1; # 1 is an error
-  }
+    my $eg_io = $self->get_filehandle($file);
+    if ( !defined $eg_io ) {
+        print "ERROR: Could not open $file\n";
+        return 1;    # 1 is an error
+    }
 
   
 
-  my $head = <EG>; # first record are the headers
+  my $head = $eg_io->getline(); # first record are the headers
   chomp $head;
   my (@arr) = split(/\s+/,$head);
   # process this to the correct indexes to use. (incase they change);
@@ -92,7 +89,7 @@ sub run {
   }
   my $xref_count = 0;
   my $syn_count  = 0;
-  while (<EG>) {
+  while ( $_ = $eg_io->getline() ) {
     chomp;
     my (@arr) = split(/\t/,$_);
     if($arr[$gene_tax_id_index] != $species_tax_id){
@@ -110,18 +107,12 @@ sub run {
       $syn_count++;
     }
   }
+
+  $eg_io->close();
+
   print $xref_count." EntrezGene Xrefs added with $syn_count synonyms\n";
   return 0; #successful
 }
 
-
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::EntrezGeneParser";
-  return $self;
-
-}
  
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/FastaParser.pm b/misc-scripts/xref_mapping/XrefParser/FastaParser.pm
index 44da5797de..ebde3171f0 100644
--- a/misc-scripts/xref_mapping/XrefParser/FastaParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/FastaParser.pm
@@ -4,10 +4,7 @@ use strict;
 use Bio::SeqIO;
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # Fasta file format, e.g.
 # >foo peptide sequence for the foo gene
@@ -55,13 +52,4 @@ sub run {
   return 0; #successful
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::FastaParser";
-  return $self;
-
-}
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/Flybase_dmel_GFFv3_Parser.pm b/misc-scripts/xref_mapping/XrefParser/Flybase_dmel_GFFv3_Parser.pm
index 653589e35a..31db32db96 100644
--- a/misc-scripts/xref_mapping/XrefParser/Flybase_dmel_GFFv3_Parser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/Flybase_dmel_GFFv3_Parser.pm
@@ -10,14 +10,12 @@
 package XrefParser::Flybase_dmel_GFFv3_Parser;
 
 use strict;
+
 use POSIX qw(strftime);
 use File::Basename;
-use XrefParser::BaseParser;
-use vars qw(@ISA);
 use Bio::EnsEMBL::Utils::Exception;
 
-
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 my %cache_source =();
 
@@ -40,9 +38,8 @@ if (!defined(caller())) {
 # --------------------------------------------------------------------------------
 
 sub new {
-  my ($class,@args) = @_;
-  my $self={};
-  bless $self,$class;
+  my $proto = shift;
+  my $self = $proto->SUPER::new(@_);
 
   $self->external_source_db_name('flybase_gff');
 
@@ -165,11 +162,15 @@ sub create_xrefs {
   my ($self, $flybase_source_id, $file) = @_;
 
   print STDERR "starting to parse $file...." ;
-  if(!open(GFF, $file)){
+
+  my $gff_io = $self->get_filehandle($file);
+
+  if ( !defined $gff_io ) {
     print "ERROR: Can't open the GFF file $file\n";
     return 0;
   }
-  while (<GFF>) {
+
+  while ( $_ = $gff_io->getline() ) {
     chomp;
 	my @col = split /\s+/;
     if($col[3]){
@@ -202,7 +203,9 @@ sub create_xrefs {
     }
 	
   }
-  close (GFF);
+
+  $gff_io->close();
+
   return 1;
 }
 
diff --git a/misc-scripts/xref_mapping/XrefParser/GOParser.pm b/misc-scripts/xref_mapping/XrefParser/GOParser.pm
index 00869d3d6f..eac9d917f3 100644
--- a/misc-scripts/xref_mapping/XrefParser/GOParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/GOParser.pm
@@ -6,12 +6,7 @@ use strict;
 use POSIX qw(strftime);
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
-
-
+use base qw( XrefParser::BaseParser );
 
 # --------------------------------------------------------------------------------
 # Parse command line and run if being run directly
@@ -63,13 +58,16 @@ sub run {
 
   my $count  = 0;
 
-  if(!open(GO,"<".$file)){
+  my $go_io = $self->get_filehandle($file);
+
+  if ( !defined $go_io ) {
     print "ERROR: Could not open $file\n";
-    return 1; # 1 error
+    return 1;    # 1 error
   }
+
   my $taxon_line = "taxon:".$species_id;
   my $miss =0;
-  while (<GO>) {
+  while ( $_ = $go_io->getline() ) {
     if(/$taxon_line/){
       chomp;
       my @array = split (/\t/,$_);
@@ -168,16 +166,11 @@ sub run {
       }
     }
   }
+
+  $go_io->close();
+
   print "\t$count GO dependent xrefs added $refseq_miss refseq not found and $swiss_miss Swissprot not found \n"; 
   return 0;
 }
 
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::GOParser";
-  return $self;
-
-}
- 
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/HUGOParser.pm b/misc-scripts/xref_mapping/XrefParser/HUGOParser.pm
index 1ff87fa540..c207105f6f 100644
--- a/misc-scripts/xref_mapping/XrefParser/HUGOParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/HUGOParser.pm
@@ -3,10 +3,8 @@ package XrefParser::HUGOParser;
 use strict;
 use File::Basename;
 
-use XrefParser::BaseParser;
+use base qw( XrefParser::BaseParser );
 
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
 my $xref_sth ;
 my $dep_sth;
 my $syn_sth;
@@ -67,18 +65,20 @@ sub run {
   my $entrezgene_count = 0;
   my $mismatch = 0;
 
-  if(!open (HUGO, "<$file")){
-    print  "ERROR: Can't open HUGO file $file\n";
+  my $hugo_io = $self->get_filehandle($file);
+
+  if ( !defined $hugo_io ) {
+    print "ERROR: Can't open HUGO file $file\n";
     return 1;
   }
 
-  <HUGO>;
+  $_ = $hugo_io->getline();
 
   #23	ABAT	4-aminobutyrate aminotransferase		P80404
   #29	ABCA1	ATP-binding cassette, sub-family A (ABC1), member 1	ABC1, HDLDT1	O95477
   #40	ABCB1	ATP-binding cassette, sub-family B (MDR/TAP), member 1	PGY1, MDR1, CLCS	P-gp, CD243, GP170, ABC20	P08183	NM_000927
 
-  while (<HUGO>) {
+  while ( $_ = $hugo_io->getline() ) {
 
     chomp;
 
@@ -168,7 +168,7 @@ sub run {
 
   } # while HUGO
 
-  close (HUGO);
+  $hugo_io->getline();
 
   print "Loaded a total of " . ($swiss_count + $refseq_count + $entrezgene_count) . " HUGO xrefs, $refseq_count from RefSeq curated mappings and $swiss_count from Uniprot (mapped) and $entrezgene_count from EntrezGene mappings\n";
 
@@ -182,15 +182,6 @@ sub rename_url_file{
   return "hugo.txt";
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::HUGOParser";
-  return $self;
-
-}
-
 1;
     
 
diff --git a/misc-scripts/xref_mapping/XrefParser/HUGO_CCDSParser.pm b/misc-scripts/xref_mapping/XrefParser/HUGO_CCDSParser.pm
index 394cf49af5..ca172bf50f 100644
--- a/misc-scripts/xref_mapping/XrefParser/HUGO_CCDSParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/HUGO_CCDSParser.pm
@@ -4,10 +4,7 @@ use strict;
 
 use DBI;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # Parse file of HGNC records and assign direct xrefs
 # All assumed to be linked to genes
@@ -16,7 +13,9 @@ sub run {
 
   my ($self, $file, $source_id, $species_id) = @_;
 
-  if(!open(HUGO,"<".$file)){
+  my $hugo_io = $self->get_filehandle($file);
+
+  if ( !defined $hugo_io ) {
     print "Could not open $file\n";
     return 1;
   }
@@ -68,7 +67,8 @@ sub run {
   my %seen;
   my $ignore_count = 0;
   my $ignore_examples ="";
-  while(<HUGO>){
+
+  while( $_ = $hugo_io->getline() ) {
     chomp;
     my ($ccds,$hgnc) = split;
     
@@ -96,17 +96,9 @@ sub run {
     print $ignore_count." ignoreed due to numbers no identifiers being no longer valid :- $ignore_examples\n";
   }
 
-  close(HUGO);
-  return 0;
+  $hugo_io->close();
 
-}
-
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::HUGO_CCDSParser";
-  return $self;
+  return 0;
 
 }
 
diff --git a/misc-scripts/xref_mapping/XrefParser/HUGO_ENSGParser.pm b/misc-scripts/xref_mapping/XrefParser/HUGO_ENSGParser.pm
index af972d8015..664904ff7b 100644
--- a/misc-scripts/xref_mapping/XrefParser/HUGO_ENSGParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/HUGO_ENSGParser.pm
@@ -4,10 +4,7 @@ use strict;
 
 use DBI;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # Parse file of HGNC records and assign direct xrefs
 # All assumed to be linked to genes
@@ -16,15 +13,16 @@ sub run {
 
   my ($self, $file, $source_id, $species_id) = @_;
 
-  if(!open(HUGO,"<".$file)){
+  my $hugo_io = $self->get_filehandle($file);
+
+  if ( !defined $hugo_io ) {
     print "Could not open $file\n";
     return 1;
   }
+
   my $line_count = 0;
   my $xref_count = 0;
 
-
-
   # becouse the direct mapping have no descriptions etc
   # we have to steal these fromt he previous HUGO parser.
   # This is why the order states this is after the other one.
@@ -51,7 +49,8 @@ sub run {
   my $ignore_count = 0;
   my $ignore_examples ="";
   my %acc;
-  while (<HUGO>) {
+
+  while ( $_ = $hugo_io->getline() ) {
 
     my ($hgnc, $stable_id) = split;
 
@@ -79,17 +78,10 @@ sub run {
   if($ignore_count){
     print $ignore_count." ignoreed due to numbers no identifiers being no longer valid :- $ignore_examples\n";
   }
-  close(HUGO);
-  return 0;
-}
-
 
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::HUGO_ENSGParser";
-  return $self;
+  $hugo_io->close();
 
+  return 0;
 }
 
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/IPIParser.pm b/misc-scripts/xref_mapping/XrefParser/IPIParser.pm
index e37d31b080..0f62360f24 100644
--- a/misc-scripts/xref_mapping/XrefParser/IPIParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/IPIParser.pm
@@ -3,10 +3,7 @@ package XrefParser::IPIParser;
 use strict;
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # IPI file format: fasta, e.g.
 # >IPI:IPI00000005.1|SWISS-PROT:P01111|TREMBL:Q15104|REFSEQ_NP:NP_002515|ENSEMBL:ENSP00000261444 Tax_Id=9606 Transforming protein N-Ras
@@ -22,14 +19,16 @@ sub run {
 
   local $/ = "\n>";
 
-  if(!open(IPI,"<".$file)){
+  my $ipi_io = $self->get_filehandle($file);
+
+  if ( !defined $ipi_io ) {
     print "ERROR: Could not open $file\n";
-    return 1; # 1 = error
+    return 1;    # 1 = error
   }
-  my $species_tax_id = $self->get_taxonomy_from_species_id($species_id);
 
-  while (<IPI>) {
+  my $species_tax_id = $self->get_taxonomy_from_species_id($species_id);
 
+  while ( $_ = $ipi_io->getline() ) {
     my $xref;
 
     my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn("Can't parse FASTA entry: $_\n");
@@ -64,6 +63,8 @@ sub run {
 
   }
 
+  $ipi_io->close();
+
   print scalar(@xrefs) . " IPI xrefs succesfully parsed\n";
 
   XrefParser::BaseParser->upload_xref_object_graphs(\@xrefs);
@@ -72,13 +73,4 @@ sub run {
   return 0; #successful
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::IPIParser";
-  return $self;
-
-}
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/IlluminaParser.pm b/misc-scripts/xref_mapping/XrefParser/IlluminaParser.pm
index 10be9f5d07..d959b579b0 100644
--- a/misc-scripts/xref_mapping/XrefParser/IlluminaParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/IlluminaParser.pm
@@ -2,10 +2,7 @@ package XrefParser::IlluminaParser;
 
 use strict;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # Parser for Illumina V2 xrefs - V1 are done by the vanilla FastaParser
 
@@ -21,13 +18,14 @@ sub run {
 
   my @xrefs;
 
-  if(!open(FILE,"<".$file)){
+  my $file_io = $self->get_filehandle($file);
+
+  if ( !defined $file_io ) {
     print "Could not open $file\n";
     return 1;
   }
 
-  while (<FILE>) {
-
+  while ( $_ = $file_io->getline() ) {
     chomp;
 
     my $xref;
@@ -63,7 +61,7 @@ sub run {
 
   }
 
-  close(FILE);
+  $file_io->close();
 
   print scalar(@xrefs) . " Illumina V2 xrefs succesfully parsed\n";
 
@@ -73,13 +71,4 @@ sub run {
   return 0;
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::IlluminaParser";
-  return $self;
-
-}
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/InterproParser.pm b/misc-scripts/xref_mapping/XrefParser/InterproParser.pm
index 31e7706bf6..defaf5fd35 100644
--- a/misc-scripts/xref_mapping/XrefParser/InterproParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/InterproParser.pm
@@ -4,11 +4,8 @@ use strict;
 use POSIX qw(strftime);
 use File::Basename;
   
-use XrefParser::BaseParser;
+use base qw( XrefParser::BaseParser );
   
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
- 
 my $xref_sth ;
 my $dep_sth;
   
@@ -57,15 +54,18 @@ sub run {
 
 
   my $dir = dirname($file);
-                                                                                                                         
+
   my %short_name;
   my %description;
   my %pfam;
-     
-  if(!open (XML, $dir."/interpro.xml")){
+
+  my $xml_io = $self->get_filehandle( $dir . "/interpro.xml" );
+
+  if ( !defined $xml_io ) {
     print "ERROR: Can't open hugo interpro file $dir/interpro.xml\n";
-    return 1; # 1= error
+    return 1;    # 1= error
   }
+
   #<interpro id="IPR001023" type="Family" short_name="Hsp70" protein_count="1556">
   #    <name>Heat shock protein Hsp70</name>
   #     <db_xref protein_count="18" db="PFAM" dbkey="PF01278" name="Omptin" />
@@ -77,8 +77,8 @@ sub run {
 
   my $last = "";
   my $i =0;
-  while (<XML>) {
 
+  while ( $_ = $xml_io->getline() ) {
     my $interpro;
     my $short_name;
     my $name;
@@ -109,7 +109,8 @@ sub run {
     }
   }
 
-  close (LONG);
+  $xml_io->close();
+
   for my $db ( keys %count ) {
     print "\t".$count{$db}." $db loaded.\n";
   }
@@ -126,14 +127,4 @@ sub get_xref{
   return 0;
 }
 
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::InterproParser";
-  return $self;
-
-}
- 
 1;
-    
-
diff --git a/misc-scripts/xref_mapping/XrefParser/JGI_Parser.pm b/misc-scripts/xref_mapping/XrefParser/JGI_Parser.pm
index 58377e3011..4cb15cd0a2 100644
--- a/misc-scripts/xref_mapping/XrefParser/JGI_Parser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/JGI_Parser.pm
@@ -3,10 +3,7 @@ package XrefParser::JGI_Parser;
 use strict;
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # JGI protein file with gene predictons  - FASTA FORMAT  
 #
@@ -50,11 +47,14 @@ sub run {
 
   local $/ = "\n>";
 
-  if(!open(FILE,"<".$file)){
-    print  "ERROR: Could not open $file\n";
-    return 1; # 1 is an error
+  my $file_io = $self->get_filehandle($file);
+
+  if ( !defined $file_io ) {
+    print "ERROR: Could not open $file\n";
+    return 1;    # 1 is an error
   }
-  while (<FILE>) {
+
+  while ( $_ = $file_io->getline() ) {
 
     next if (/^File:/);   # skip header
 
@@ -122,7 +122,7 @@ sub run {
 
   }
 
-  close (FILE);
+  $file_io->close();
 
   print scalar(@xrefs) . " JGI_ xrefs succesfully parsed\n";
 
@@ -133,11 +133,14 @@ sub run {
 }
 
 
-sub new {
-  my $self = {};
-  bless $self, "XrefParser::JGI_Parser";
-  print "\n\nh ave new jp\n" ; 
-  return $self;
+sub new
+{
+    my $proto = shift;
+    my $self  = $proto->SUPER::new(@_);
+
+    print "\n\nhave new jp\n";
+
+    return $self;
 }
 
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/JGI_ProteinParser.pm b/misc-scripts/xref_mapping/XrefParser/JGI_ProteinParser.pm
index 218e8fe9dc..90afb658b7 100644
--- a/misc-scripts/xref_mapping/XrefParser/JGI_ProteinParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/JGI_ProteinParser.pm
@@ -3,21 +3,12 @@ package XrefParser::JGI_ProteinParser;
 
 use strict;
 
-use XrefParser::JGI_Parser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::JGI_Parser);
+use base qw( XrefParser::JGI_Parser );
 
 # See JGI_Parser for details
+
 sub get_sequence_type() {
   return 'peptide';
 }
 
-
-sub new {
-  my $self = {};
-  bless $self, "XrefParser::JGI_ProteinParser";
-  return $self;
-}
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/MGDParser.pm b/misc-scripts/xref_mapping/XrefParser/MGDParser.pm
index c5bb48cb42..85b9aa5dc9 100644
--- a/misc-scripts/xref_mapping/XrefParser/MGDParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/MGDParser.pm
@@ -4,11 +4,8 @@ use strict;
 use POSIX qw(strftime);
 use File::Basename;
  
-use XrefParser::BaseParser;
+use base qw( XrefParser::BaseParser );
  
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
-
 my $xref_sth ;
 my $dep_sth;
  
@@ -51,11 +48,14 @@ sub run {
   my $mismatch = 0;
   my %mgi_good;
 
-  if(!open(FILE,"<". $file)){
-    print  "ERROR: Could not open file $file";
-    return 1; # 1 is an error
+  my $file_io = $self->get_filehandle($file);
+
+  if ( !defined $file_io ) {
+    print "ERROR: Could not open file $file";
+    return 1;    # 1 is an error
   }
-  while(my $line = <FILE>){
+
+  while ( my $line = $file_io->getline() ) {
     chomp $line;
     my ($key,$label,$desc,$sps) = (split("\t",$line))[0,1,3,6];
     my @sp = split(/\s/,$sps); 
@@ -70,19 +70,21 @@ sub run {
       }
     }
   }
-  close FILE;
-
+  $file_io->close();
 
   my $dir = dirname($file);
   my $syn_file = $dir."/MRK_Synonym.sql.rpt";
 
-  if(!open(FILE2,"<". $syn_file)){
-    print  "ERROR: Could not open file $syn_file";
+  $file_io = $self->get_filehandle($syn_file);
+
+  if ( !defined $file_io ) {
+    print "ERROR: Could not open file $syn_file";
     return 1;
   }
+
   my $synonyms=0;
 
-  while(<FILE2>){
+  while ( $_ = $file_io->getline() ) {
     if(/MGI:/){
       chomp ;
       my ($key,$syn) = (split)[0,4];
@@ -92,23 +94,15 @@ sub run {
       }
     }
   }
-  close FILE2;
+
+  $file_io->close();
+
   print "\t$count xrefs succesfully loaded\n";
   print "\t$synonyms synonyms successfully loaded\n";
   print "\t$mismatch xrefs failed to load\n";
      
   return 0;
-
-
-}                                                                                                                     
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::MGDParser";
-  return $self;
-
 }
- 
+
 1;
     
diff --git a/misc-scripts/xref_mapping/XrefParser/MIMParser.pm b/misc-scripts/xref_mapping/XrefParser/MIMParser.pm
index a21828179f..c634c09925 100644
--- a/misc-scripts/xref_mapping/XrefParser/MIMParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/MIMParser.pm
@@ -4,11 +4,7 @@ use strict;
 use POSIX qw(strftime);
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
-
+use base qw( XrefParser::BaseParser );
 
 # --------------------------------------------------------------------------------
 # Parse command line and run if being run directly
@@ -52,16 +48,21 @@ sub run {
     
   local $/ = "*RECORD*";
 
-  if(!open(MIM,"<".$file)){
-    print  "ERROR: Could not open $file\n";
-    return 1; # 1 is an error
+  my $mim_io = $self->get_filehandle($file);
+
+  if ( !defined $mim_io ) {
+    print "ERROR: Could not open $file\n";
+    return 1;    # 1 is an error
   }
-  
+
   my $gene = 0;
   my $phenotype = 0;
   my $removed_count =0;
-  <MIM>; # first record is empty with *RECORD* as the record seperator
-  while (<MIM>) {
+
+  $mim_io->getline();    # first record is empty with *RECORD* as the
+                         # record seperator
+
+  while ( $_ = $mim_io->getline() ) {
     #get the MIM number
     my $number = 0;
     my $description = undef;
@@ -101,6 +102,9 @@ sub run {
       }
     }
   }
+
+  $mim_io->close();
+
   my $syn_count =0;
   foreach my $mim (keys %old_to_new){
     my $old= $mim;
@@ -118,14 +122,4 @@ sub run {
   return 0; #successful
 }
 
-
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::MIMParser";
-  return $self;
-
-}
- 
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/OTTTParser.pm b/misc-scripts/xref_mapping/XrefParser/OTTTParser.pm
index 7d188c3a6a..a56b154227 100644
--- a/misc-scripts/xref_mapping/XrefParser/OTTTParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/OTTTParser.pm
@@ -4,10 +4,7 @@ use strict;
 
 use DBI;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # Parse file of Ensembl - Vega OTTT transcript mappings
 # ENST00000373795:	OTTHUMT00000010392
@@ -19,17 +16,19 @@ sub run {
 
   my ($self, $file, $source_id, $species_id) = @_;
 
-  if(!open(OTTT,"<".$file)){
+  my $ottt_io = $self->get_filehandle($file);
+
+  if ( !defined $ottt_io ) {
     print "Could not open $file\n";
     return 1;
   }
+
   my $line_count = 0;
   my $xref_count = 0;
 
   my $xref_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$source_id AND species_id=$species_id");
 
-  while (<OTTT>) {
-
+  while ( $_ = $ottt_io->getline() ) {
     my ($ens, $ottt) = split;
 
     $ens =~ s/://g;
@@ -48,19 +47,12 @@ sub run {
 
   }
 
-  print "Parsed $line_count OTTT identifiers from $file, added $xref_count xrefs and $line_count direct_xrefs\n";
+  $ottt_io->close();
 
-  close(OTTT);
-  return 0;
-}
-
-
-sub new {
+  print "Parsed $line_count OTTT identifiers from $file, added $xref_count xrefs and $line_count direct_xrefs\n";
 
-  my $self = {};
-  bless $self, "XrefParser::OTTTParser";
-  return $self;
 
+  return 0;
 }
 
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/RGDParser.pm b/misc-scripts/xref_mapping/XrefParser/RGDParser.pm
index 6eb169a375..8f147c02a8 100644
--- a/misc-scripts/xref_mapping/XrefParser/RGDParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/RGDParser.pm
@@ -4,10 +4,7 @@ use strict;
 use POSIX qw(strftime);
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 my $xref_sth ;
 my $dep_sth;
@@ -47,11 +44,14 @@ sub run {
   my (%refseq) = %{XrefParser::BaseParser->get_valid_codes("refseq",$species_id)};
   
 
-  if(!open(RGD,"<".$file)){
+  my $rgd_io = $self->get_filehandle($file);
+
+  if ( !defined $rgd_io ) {
     print "ERROR: Could not open $file\n";
     return 1;
   }
-  my $line = <RGD>;
+
+  my $line = $rgd_io->getline();
   chomp $line;
   my @linearr = split(/\t/,$line);
 
@@ -74,7 +74,7 @@ sub run {
   
   my $count= 0;
   my $mismatch = 0;
-  while ($line = <RGD>) {
+  while ( $line = $rgd_io->getline() ) {
     chomp $line;
     my ($rgd, $symbol, $name, $refseq) = (split (/\t/,$line))[0,1,2,16];
     my @nucs = split(/\,/,$refseq);
@@ -94,6 +94,7 @@ sub run {
 	}
       }
     }
+
     if(!$done){
 #      print STDERR "$rgd FAILED for $failed_list\n";
       $self->add_xref("RGD:".$rgd,"",$symbol,$name,$source_id,$species_id);
@@ -101,18 +102,12 @@ sub run {
     }
 
   }
+
+  $rgd_io->close();
+
   print "\t$count xrefs succesfully loaded and dependent on refseq\n";
   print "\t$mismatch xrefs added but with NO dependencies\n";
   return 0;
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::RGDParser";
-  return $self;
-
-}
- 
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm
index 9a1392014f..efad047380 100644
--- a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm
@@ -6,10 +6,7 @@ use strict;
 
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw( XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # --------------------------------------------------------------------------------
 # Parse command line and run if being run directly
@@ -49,8 +46,10 @@ sub run {
   my $pred_dna_source_id = XrefParser::BaseParser->get_source_id_for_source_name('RefSeq_dna_predicted');
   print "RefSeq_peptide_predicted source ID = $pred_peptide_source_id; RefSeq_dna_predicted source ID = $pred_dna_source_id\n";
 
+  my $xrefs =
+    $self->create_xrefs( $peptide_source_id, $dna_source_id,
+      $pred_peptide_source_id, $pred_dna_source_id, $file, $species_id );
 
-  my $xrefs = create_xrefs($peptide_source_id, $dna_source_id, $pred_peptide_source_id, $pred_dna_source_id, $file, $species_id);
   if(!defined($xrefs)){
     return 1; #error
   }
@@ -68,8 +67,10 @@ sub run {
 # Slightly different formats
 
 sub create_xrefs {
+  my $self = shift;
 
-  my ($peptide_source_id, $dna_source_id, $pred_peptide_source_id, $pred_dna_source_id, $file, $species_id) = @_;
+  my ( $peptide_source_id, $dna_source_id, $pred_peptide_source_id,
+      $pred_dna_source_id, $file, $species_id ) = @_;
 
   my %name2species_id =  XrefParser::BaseParser->name2species_id();
 
@@ -78,10 +79,13 @@ sub create_xrefs {
 #  my (%genemap) = %{XrefParser::BaseParser->get_valid_codes("mim_gene",$species_id)};
 #  my (%morbidmap) = %{XrefParser::BaseParser->get_valid_codes("mim_morbid",$species_id)};
 
-  if(!open(REFSEQ, $file)){
+  my $refseq_io = $self->get_filehandle($file);
+
+  if ( !defined $refseq_io ) {
     print "ERROR: Can't open RefSeqGPFF file $file\n";
     return undef;
   }
+
   my @xrefs;
 
   local $/ = "\/\/\n";
@@ -109,7 +113,7 @@ sub create_xrefs {
   }
 
 
-  while (<REFSEQ>) {
+  while ( $_ = $refseq_io->getline() ) {
 
     my $xref;
 
@@ -224,7 +228,7 @@ sub create_xrefs {
 
   } # while <REFSEQ>
 
-  close (REFSEQ);
+  $refseq_io->close();
 
   print "Read " . scalar(@xrefs) ." xrefs from $file\n";
 
@@ -234,14 +238,4 @@ sub create_xrefs {
 
 # --------------------------------------------------------------------------------
 
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::RefSeqGPFFParser";
-  return $self;
-
-}
-
-# --------------------------------------------------------------------------------
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm
index 1557f53fc5..1625701a10 100644
--- a/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm
@@ -6,10 +6,7 @@ use strict;
 
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # --------------------------------------------------------------------------------
 # Parse command line and run if being run directly
@@ -46,7 +43,10 @@ sub run {
     $species_id = XrefParser::BaseParser->get_species_id_for_filename($file);
   }
 
-  my $xrefs = create_xrefs($peptide_source_id, $dna_source_id, $pred_peptide_source_id, $pred_dna_source_id, $file, $species_id);
+  my $xrefs =
+    $self->create_xrefs( $peptide_source_id, $dna_source_id,
+      $pred_peptide_source_id, $pred_dna_source_id, $file, $species_id );
+
   if(!defined($xrefs)){
     return 1; #error
   }
@@ -65,21 +65,25 @@ sub run {
 # Slightly different formats
 
 sub create_xrefs {
+  my $self = shift;
 
-  my ($peptide_source_id, $dna_source_id, $pred_peptide_source_id, $pred_dna_source_id, $file, $species_id) = @_;
+  my ( $peptide_source_id, $dna_source_id, $pred_peptide_source_id,
+      $pred_dna_source_id, $file, $species_id ) = @_;
 
   my %name2species_id = XrefParser::BaseParser->name2species_id();
 
-  if(!open(REFSEQ, $file)){
-    print "ERROR: Can't open RefSeq file $file\n";
-    return undef;
+  my $refseq_io = $self->get_filehandle($file);
+
+  if ( !defined $refseq_io ) {
+      print "ERROR: Can't open RefSeq file $file\n";
+      return undef;
   }
+
   my @xrefs;
 
   local $/ = "\n>";
 
-  while (<REFSEQ>) {
-
+  while ( $_ = $refseq_io->getline() ) {
     my $xref;
 
     my $entry = $_;
@@ -92,7 +96,7 @@ sub create_xrefs {
 
     (my $gi, my $n, my $ref, my $acc, my $description) = split(/\|/, $header);
     my ($species, $mrna);
-    if ($file =~ /\.faa$/) {
+    if ($file =~ /\.faa(\.gz|\.Z)?$/) {
 
       ($mrna, $description, $species) = $description =~ /(\S*)\s+(.*)\s+\[(.*)\]$/;
       $xref->{SEQUENCE_TYPE} = 'peptide';
@@ -105,7 +109,7 @@ sub create_xrefs {
         }
       $xref->{SOURCE_ID} = $source_id;
 
-    } elsif ($file =~ /\.fna$/) {
+    } elsif ($file =~ /\.fna(\.gz|\.Z)?$/) {
 
       ($species, $description) = $description =~ /\s*(\w+\s+\w+)\s+(.*)$/;
       $xref->{SEQUENCE_TYPE} = 'dna';
@@ -126,8 +130,10 @@ sub create_xrefs {
     my $species_id_check = $name2species_id{$species};
 
     # skip xrefs for species that aren't in the species table
-    if (defined($species_id) and $species_id == $species_id_check) {
-
+    if (   defined $species_id
+        && defined $species_id_check
+        && $species_id == $species_id_check )
+    {
       my ($acc_no_ver,$ver) = split (/\./,$acc);
       $xref->{ACCESSION} = $acc_no_ver;
       $xref->{VERSION} = $ver;
@@ -144,7 +150,7 @@ sub create_xrefs {
 
   }
 
-  close (REFSEQ);
+  $refseq_io->close();
 
   print "Read " . scalar(@xrefs) ." xrefs from $file\n";
 
@@ -154,14 +160,4 @@ sub create_xrefs {
 
 # --------------------------------------------------------------------------------
 
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::RefSeqParser";
-  return $self;
-
-}
-
-# --------------------------------------------------------------------------------
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeq_CCDSParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeq_CCDSParser.pm
index e1dcec0067..96a376aae4 100644
--- a/misc-scripts/xref_mapping/XrefParser/RefSeq_CCDSParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/RefSeq_CCDSParser.pm
@@ -4,10 +4,7 @@ use strict;
 
 use DBI;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # Parse file of Refseq records and assign direct xrefs
 
@@ -15,13 +12,13 @@ sub run {
 
   my ($self, $file, $source_id, $species_id) = @_;
 
-  if(!open(REFSEQ,"<".$file)){
+  my $refseq_io = $self->get_filehandle($file);
+
+  if ( defined $refseq_io ) {
     print "Could not open $file\n";
     return 1;
   }
 
-
-
   # becouse the direct mapping have no descriptions etc
   # we have to steal these from the previous Refseq parser.
 
@@ -67,8 +64,10 @@ sub run {
   my $xref_count = 0;
   my %seen;
   my %old_to_new;
-  <REFSEQ>; # header
-  while(<REFSEQ>){
+
+  $refseq_io->getline();    # header
+
+  while ( $_ = $refseq_io->getline() ) {
       chomp;
       my ($ccds,$refseq) = split;
     
@@ -102,19 +101,12 @@ sub run {
       }   
   }
 
-  print "Parsed $line_count RefSeq_dna identifiers from $file, added $xref_count xrefs and $xref_count direct_xrefs  from $line_count lines.\n";
-
-  close(REFSEQ);
-  return 0;
-
-}
+  $refseq_io->close();
 
+  print "Parsed $line_count RefSeq_dna identifiers from $file, added $xref_count xrefs and $xref_count direct_xrefs  from $line_count lines.\n";
 
-sub new {
 
-  my $self = {};
-  bless $self, "XrefParser::RefSeq_CCDSParser";
-  return $self;
+  return 0;
 
 }
 
diff --git a/misc-scripts/xref_mapping/XrefParser/SGDParser.pm b/misc-scripts/xref_mapping/XrefParser/SGDParser.pm
index d6beb15afd..8d1f89f604 100644
--- a/misc-scripts/xref_mapping/XrefParser/SGDParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/SGDParser.pm
@@ -4,11 +4,7 @@ use strict;
 use POSIX qw(strftime);
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
-
+use base qw( XrefParser::BaseParser );
 
 # --------------------------------------------------------------------------------
 # Parse command line and run if being run directly
@@ -40,15 +36,17 @@ sub run {
   
   
 
-  if(!open(SGD,"<".$file)){
-    print  "ERROR: Could not open $file\n";
-    return 1; # 1 is an error
+  my $sgd_io = $self->get_filehandle($file);
+
+  if ( !defined $sgd_io ) {
+    print "ERROR: Could not open $file\n";
+    return 1;    # 1 is an error
   }
 
   my $xref_count =0;
   my $syn_count =0;
 
-  while (<SGD>) {
+  while ( $_ = $sgd_io->getline() ) {
     chomp;
     my ($locus_name, $alias_name, $desc, $gene_prod, $phenotype, $orf_name, $sgd_id) = split(/\t/,$_);
 
@@ -60,18 +58,11 @@ sub run {
       $syn_count++;
     }
   }
+
+  $sgd_io->close();
+
   print $xref_count." SGD Xrefs added with $syn_count synonyms\n";
   return 0; #successful
 }
 
-
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::SGDParser";
-  return $self;
-
-}
- 
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/SegmentParser.pm b/misc-scripts/xref_mapping/XrefParser/SegmentParser.pm
index 8d5605cb3b..8ff59a44db 100644
--- a/misc-scripts/xref_mapping/XrefParser/SegmentParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/SegmentParser.pm
@@ -4,10 +4,7 @@ use strict;
 use POSIX qw(strftime);
 use File::Basename;
  
-use XrefParser::BaseParser;
- 
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # --------------------------------------------------------------------------------
 # Parse command line and run if being run directly
@@ -41,11 +38,14 @@ sub run {
   my %name_2_source_id=();
   my $added=0;
 
-  if(!open(FILE,"<". $file)){
-    print  "ERROR: Could not open file $file\n";
+  my $file_io = $self->get_filehandle($file);
+
+  if ( !defined $file_io ) {
+    print "ERROR: Could not open file $file\n";
     return 1;
   }
-  while(my $line = <FILE>){
+
+  while ( my $line = $file_io->getline() ) {
     chomp $line;
     my ($gene_id,$transcript_id,$source_name,$acc,$display_label,$description, $status)
       = split("\t",$line);
@@ -68,19 +68,11 @@ sub run {
     #the those mapped to the transcript to the genes anyway due to the
     #biomart check
   }
-  close FILE;
+
+  $file_io->close();
 
   print "Added $added Xrefs for Gene segments\n";
   return 0;
 }
 
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::SegmentParser";
-  return $self;
-
-}
-
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/UniGeneParser.pm b/misc-scripts/xref_mapping/XrefParser/UniGeneParser.pm
index 1d6f7e1faf..fd0680d935 100644
--- a/misc-scripts/xref_mapping/XrefParser/UniGeneParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/UniGeneParser.pm
@@ -6,10 +6,7 @@ use strict;
 
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # --------------------------------------------------------------------------------
 # Parse command line and run if being run directly
@@ -41,7 +38,11 @@ sub run {
   if(!defined($species_id)){
     $species_id = XrefParser::BaseParser->get_species_id_for_filename($file);
   }
-my $xrefs =create_xrefs($unigene_source_id, $unigene_source_id, $file, $species_id);
+
+  my $xrefs =
+    $self->create_xrefs( $unigene_source_id, $unigene_source_id, $file,
+      $species_id );
+
   if(!defined($xrefs)){
     return 1; #error
   }
@@ -56,23 +57,24 @@ my $xrefs =create_xrefs($unigene_source_id, $unigene_source_id, $file, $species_
 my %geneid_2_desc;
 
 sub get_desc{
+  my $self = shift;
   my $file = shift;
+
   my $dir = dirname($file);
- 
 
   (my $name) = $file  =~ /\/(\w+)\.seq\.uniq/;
   print $name."\n";
 
   local $/ = "//";
 
+  my $desc_io = $self->get_filehandle( $dir . '/' . $name . '.data' );
 
-  if(!open (DESC, "$dir/$name.data")){
+  if ( !defined $desc_io ) {
     print "ERROR: Can't open $dir/$name.data\n";
     return undef;
   }
 
-  while(<DESC>){
-    
+  while ( $_ = $desc_io->getline() ) {
     #ID          Hs.159356
     #TITLE       Hypothetical LOC388277
     
@@ -82,21 +84,27 @@ sub get_desc{
     $geneid_2_desc{$id} = $descrip;
     
   }
+
+  $desc_io->close();
+
   return 1;
 }
 
 
 sub create_xrefs {
+  my $self = shift;
 
-  my ($peptide_source_id, $unigene_source_id, $file, $species_id) = @_;
+  my ( $peptide_source_id, $unigene_source_id, $file, $species_id ) = @_;
 
   my %name2species_id = XrefParser::BaseParser->name2species_id();
 
-  if(!defined(get_desc($file))){
+  if ( !defined( $self->get_desc($file) ) ) {
     return undef;
   }
 
-  if(!open(UNIGENE, $file)){
+  my $unigene_io = $self->get_filehandle($file);
+
+  if ( !defined $unigene_io ) {
     print "Can't open RefSeq file $file\n";
     return undef;
   }
@@ -110,7 +118,7 @@ sub create_xrefs {
 
   local $/ = "\n>";
 
-  while (<UNIGENE>) {
+  while ( $_ = $unigene_io->getline() ) {
 
     my $xref;
 
@@ -150,7 +158,8 @@ sub create_xrefs {
     
   }
 
-  close (UNIGENE);
+  $unigene_io->close();
+
   %geneid_2_desc=();
   print "Read " . scalar(@xrefs) ." xrefs from $file\n";
 
@@ -158,16 +167,4 @@ sub create_xrefs {
 
 }
 
-# --------------------------------------------------------------------------------
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::UniGeneParser";
-  return $self;
-
-}
-
-# --------------------------------------------------------------------------------
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm b/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm
index 97c5dcdf03..e15ad5a66a 100644
--- a/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm
@@ -13,10 +13,7 @@ use strict;
 use POSIX qw(strftime);
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # --------------------------------------------------------------------------------
 # Parse command line and run if being run directly
@@ -54,7 +51,10 @@ sub run {
   print "SpTREMBL source id for $file: $sptr_source_id\n";
  
 
-  my @xrefs = create_xrefs($sp_source_id, $sptr_source_id, $species_id, $file);
+  my @xrefs =
+    $self->create_xrefs( $sp_source_id, $sptr_source_id, $species_id,
+      $file );
+
   if ( !@xrefs ) {
       return 1;    # 1 error
   }
@@ -110,8 +110,9 @@ sub get_species {
 # Parse file into array of xref objects
 
 sub create_xrefs {
+  my $self = shift;
 
-  my ($sp_source_id, $sptr_source_id, $species_id, $file) = @_;
+  my ( $sp_source_id, $sptr_source_id, $species_id, $file ) = @_;
 
   my $num_sp = 0;
   my $num_sptr = 0;
@@ -135,15 +136,14 @@ sub create_xrefs {
   my (%genemap) = %{XrefParser::BaseParser->get_valid_codes("mim_gene",$species_id)};
   my (%morbidmap) = %{XrefParser::BaseParser->get_valid_codes("mim_morbid",$species_id)};
 
-  if(!open(UNIPROT, $file)){
-    print"Can't open Swissprot file $file\n";
-    return undef;
-  }
+    my $uniprot_io = $self->get_filehandle($file);
+    if ( !defined $uniprot_io ) { return undef }
+
   my @xrefs;
 
-  local $/ = "\/\/\n";
+  local $/ = "//\n";
 
-  while (<UNIPROT>) {
+  while ( $_ = $uniprot_io->getline() ) {
 
     # if an OX line exists, only store the xref if the taxonomy ID that the OX
     # line refers to is in the species table
@@ -353,7 +353,7 @@ sub create_xrefs {
 
   }
 
-  close (UNIPROT);
+  $uniprot_io->close();
 
   print "Read $num_sp SwissProt xrefs and $num_sptr SPTrEMBL xrefs from $file\n";
   print "Found $num_sp_pred predicted SwissProt xrefs and $num_sptr_pred predicted SPTrEMBL xrefs\n" if ($num_sp_pred > 0 || $num_sptr_pred > 0);
@@ -363,16 +363,4 @@ sub create_xrefs {
   #TODO - currently include records from other species - filter on OX line??
 }
 
-# --------------------------------------------------------------------------------
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::UniProtParser";
-  return $self;
-
-}
-
-# --------------------------------------------------------------------------------
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/UniProtVarSplicParser.pm b/misc-scripts/xref_mapping/XrefParser/UniProtVarSplicParser.pm
index 0486f04a67..9001395c49 100644
--- a/misc-scripts/xref_mapping/XrefParser/UniProtVarSplicParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/UniProtVarSplicParser.pm
@@ -5,10 +5,7 @@ package XrefParser::UniProtVarSplicParser;
 use strict;
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # UniProtVarSplic file format: fasta, e.g.
 
@@ -27,17 +24,18 @@ sub run {
 
   local $/ = "\n>";
 
-  if(!open(FILE,"<".$file)){
-    print  "ERROR: Could not open $file\n";
-    return 1; # 1 error
+  my $file_io = $self->get_filehandle($file);
+
+  if ( !defined $file_io ) {
+    print "ERROR: Could not open $file\n";
+    return 1;    # 1 error
   }
 
   my $species_tax_id = $self->get_taxonomy_from_species_id($species_id);
   my (%swiss)  =  %{XrefParser::BaseParser->get_valid_codes("uniprot",$species_id)};
  
   my $missed = 0;
-  while (<FILE>) {
-
+  while ( $_ = $file_io->getline() ) {
     my $xref;
 
     my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn("Can't parse FASTA entry: $_\n");
@@ -69,7 +67,7 @@ sub run {
     }
   }
 
-  close (FILE);
+  $file_io->close();
 
   print $missed." ignored as original uniprot not found in database\n";
   print scalar(@xrefs) . " UniProtVarSplic xrefs succesfully parsed\n";
@@ -80,15 +78,4 @@ sub run {
   return 0;
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::UniProtVarSplicParser";
-  return $self;
-
-}
-
 1;
-
-
diff --git a/misc-scripts/xref_mapping/XrefParser/WilsonAffyParser.pm b/misc-scripts/xref_mapping/XrefParser/WilsonAffyParser.pm
index fcd0eda3da..df059b421c 100644
--- a/misc-scripts/xref_mapping/XrefParser/WilsonAffyParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/WilsonAffyParser.pm
@@ -2,16 +2,12 @@ package XrefParser::WilsonAffyParser;
 
 use strict;
 
-use XrefParser::BaseParser;
+use base qw( XrefParser::BaseParser );
 
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
 my $xref_sth ;
 my $dep_sth;
 my $syn_sth;
 
-
-
 sub run {
 
   my ($self, $file, $source_id, $species_id) = @_;
@@ -39,14 +35,16 @@ sub create_xrefs {
 
   my @xrefs;
 
-  if(!open(FILE,"<".$file)){
+  my $file_io = $self->get_filehandle($file);
+
+  if ( !defined $file_io ) {
     print "ERROR: Could not open $file\n";
-    return 1; # 1 error
+    return 1;    # 1 error
   }
-  <FILE>; # skip first line
 
-  while (<FILE>) {
+  $file_io->getline();    # skip first line
 
+  while ( $_ = $file_io->getline() ) {
     #last if ($count > 200);
     my $xref;
 
@@ -79,10 +77,13 @@ sub create_xrefs {
 
       # fetch sequence for others (EMBL ESTs and RefSeqs - pfetch will handle these)
       system ("pfetch -q $target > seq.txt");
-      open(SEQ, "<seq.txt");
-      my $seq = <SEQ>;
+
+      my $seq_io = $self->get_filehandle('seq.txt');
+
+      my $seq = $seq_io->getline();
+      $seq_io->close();
+
       chomp($seq);
-      close(SEQ);
 
       if ($seq && $seq !~ /no match/) {
 
@@ -116,7 +117,7 @@ sub create_xrefs {
 
   }
 
-  close(FILE);
+  $file_io->close();
 
   print "\n\nParsed $count primary xrefs.\n";
   print "Couldn't get sequence for $noseq primary_xrefs\n" if ($noseq);
@@ -126,12 +127,4 @@ sub create_xrefs {
 
 }
 
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::WilsonAffyParser";
-  return $self;
-
-}
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/WormPepParser.pm b/misc-scripts/xref_mapping/XrefParser/WormPepParser.pm
index c31253be7d..8f27dbfb86 100644
--- a/misc-scripts/xref_mapping/XrefParser/WormPepParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/WormPepParser.pm
@@ -3,10 +3,8 @@ package XrefParser::WormPepParser;
 use strict;
 use File::Basename;
 
-use XrefParser::BaseParser;
+use base qw( XrefParser::BaseParser );
 
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
 my $xref_sth ;
 my $dep_sth;
 
@@ -31,15 +29,17 @@ sub run {
   my $xref_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$worm_source_id AND species_id=$species_id");
   my $xref_sth2 = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$worm_locus_id AND species_id=$species_id");
 
-  if(!open(PEP,"<".$file)){
+  my $pep_io = $self->get_filehandle($file);
+
+  if ( !defined $pep_io ) {
     print "ERROR: Could not open $file\n";
-    return 1; # 1 error
+    return 1;    # 1 error
   }
-  my ($x_count, $d_count);
 
+  my ($x_count, $d_count);
 
-  while (<PEP>) {
 
+  while ( $_ = $pep_io->getline() ) {
     my ($transcript, $wb, $display)  = (split(/\t/,substr($_,1)))[0,1,2];
 
     # reuse or create xref
@@ -69,19 +69,10 @@ sub run {
     $d_count++;
   }
 
-  close (PEP);
+  $pep_io->close();
 
   print "Added $d_count direct xrefs and $x_count xrefs\n";
   return 0;
 }
 
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::WormPepParser";
-  return $self;
-
-}
-
 1;
-
diff --git a/misc-scripts/xref_mapping/XrefParser/WormbaseDatabaseStableIDParser.pm b/misc-scripts/xref_mapping/XrefParser/WormbaseDatabaseStableIDParser.pm
index c18fa62366..92d43cabed 100644
--- a/misc-scripts/xref_mapping/XrefParser/WormbaseDatabaseStableIDParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/WormbaseDatabaseStableIDParser.pm
@@ -7,10 +7,7 @@ package XrefParser::WormbaseDatabaseStableIDParser;
 
 use strict;
 
-use XrefParser::DatabaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::DatabaseParser);
+use base qw( XrefParser::DatabaseParser );
 
 sub run {
 
@@ -46,13 +43,5 @@ sub run {
   return 0;
 }
 
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::WormbaseDatabaseStableIDParser";
-  return $self;
-
-}
-
 1;
 
diff --git a/misc-scripts/xref_mapping/XrefParser/XenopusJamboreeParser.pm b/misc-scripts/xref_mapping/XrefParser/XenopusJamboreeParser.pm
index 452855aea8..02c1e7591f 100644
--- a/misc-scripts/xref_mapping/XrefParser/XenopusJamboreeParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/XenopusJamboreeParser.pm
@@ -5,10 +5,7 @@ package XrefParser::XenopusJamboreeParser;
 use strict;
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # Xenopus Jamboree peptides file format: fasta, e.g.
 
@@ -24,14 +21,16 @@ sub run {
 
   local $/ = "\n>";
 
-  if(!open(FILE,"<".$file)){
+  my $file_io = $self->getline($file);
+
+  if ( !defined $file_io ) {
     print "ERROR: Could not open $file\n";
-    return 1;  # 1 error
+    return 1;    # 1 error
   }
-  my $species_tax_id = $self->get_taxonomy_from_species_id($species_id);
 
-  while (<FILE>) {
+  my $species_tax_id = $self->get_taxonomy_from_species_id($species_id);
 
+  while ( $_ = $file_io->getline() ) {
     my $xref;
 
     my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn("Can't parse FASTA entry: $_\n");
@@ -57,7 +56,7 @@ sub run {
 
   }
 
-  close (FILE);
+  $file_io->close();
 
   print scalar(@xrefs) . " XenopusJamboreeParser xrefs succesfully parsed\n";
 
@@ -69,13 +68,4 @@ sub run {
   return 0;
 }
 
-
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::XenopusJamboreeParser";
-  return $self;
-
-}
-
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/ZFINParser.pm b/misc-scripts/xref_mapping/XrefParser/ZFINParser.pm
index 4875998fe4..83ceda1bcf 100644
--- a/misc-scripts/xref_mapping/XrefParser/ZFINParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/ZFINParser.pm
@@ -4,12 +4,7 @@ use strict;
 use POSIX qw(strftime);
 use File::Basename;
 
-use XrefParser::BaseParser;
-
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
-
-
+use base qw( XrefParser::BaseParser );
 
 # --------------------------------------------------------------------------------
 # Parse command line and run if being run directly
@@ -45,10 +40,13 @@ sub run {
   my (%swiss) = %{XrefParser::BaseParser->get_valid_codes("uniprot",$species_id)};
   my (%refseq) = %{XrefParser::BaseParser->get_valid_codes("refseq",$species_id)};
 
-  if(!open(SWISSPROT,"<".$dir."/swissprot.txt")){
-    print  "ERROR: Could not open $dir/swissprot.txt\n";
-    return 1; # 1 error
+  my $swissprot_io = $self->get_filehandle( $dir . '/swissprot.txt' );
+
+  if ( !defined $swissprot_io ) {
+    print "ERROR: Could not open $dir/swissprot.txt\n";
+    return 1;    # 1 error
   }
+
 #e.g.
 #ZDB-GENE-000112-30      couptf2 O42532
 #ZDB-GENE-000112-32      couptf3 O42533
@@ -58,7 +56,8 @@ sub run {
   my $spcount =0;
   my $rscount =0;
   my $mismatch=0;
-  while (<SWISSPROT>) {
+
+  while ( $_ = $swissprot_io->getline() ) {
     chomp;
     my ($zfin, $label, $acc) = split (/\s+/,$_);
     if(defined($swiss{$acc})){
@@ -69,16 +68,21 @@ sub run {
       $mismatch++;
     }
   }
-  close SWISSPROT;
-  
-  if(!open(REFSEQ,"<".$dir."/refseq.txt")){
-    print  "ERROR: Could not open $dir/refseq.txt\n";
+
+  $swissprot_io->close();
+
+  my $refseq_io = $self->get_filehandle( $dir . '/refseq.txt' );
+
+  if ( !defined $refseq_io ) {
+    print "ERROR: Could not open $dir/refseq.txt\n";
     return 1;
   }
+
 #ZDB-GENE-000125-12      igfbp2  NM_131458
 #ZDB-GENE-000125-12      igfbp2  NP_571533
 #ZDB-GENE-000125-4       dlc     NP_571019
-  while (<REFSEQ>) {
+
+  while ( $_ = $refseq_io->getline() ) {
     chomp;
     my ($zfin, $label, $acc) = split (/\s+/,$_);
     if(defined($refseq{$acc})){
@@ -89,18 +93,12 @@ sub run {
       $mismatch++;
     }
   }
-  close REFSEQ;
+
+  $refseq_io->close();
+
   print "\t$spcount xrefs from Swissprot and $rscount xrefs from RefSeq succesfully loaded\n";
   print "\t$mismatch xrefs ignored\n";
   return 0;
 }
 
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::ZFINParser";
-  return $self;
-
-}
- 
 1;
diff --git a/misc-scripts/xref_mapping/XrefParser/ncRNAParser.pm b/misc-scripts/xref_mapping/XrefParser/ncRNAParser.pm
index 92710ba9cc..8ca57774fb 100644
--- a/misc-scripts/xref_mapping/XrefParser/ncRNAParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/ncRNAParser.pm
@@ -4,10 +4,7 @@ use strict;
 use POSIX qw(strftime);
 use File::Basename;
  
-use XrefParser::BaseParser;
- 
-use vars qw(@ISA);
-@ISA = qw(XrefParser::BaseParser);
+use base qw( XrefParser::BaseParser );
 
 # --------------------------------------------------------------------------------
 # Parse command line and run if being run directly
@@ -41,11 +38,14 @@ sub run {
   my %name_2_source_id=();
   my $added=0;
 
-  if(!open(FILE,"<". $file)){
-    print  "ERROR: Could not open file $file\n";
+  my $file_io = $self->get_filehandle($file);
+
+  if ( !defined $file_io ) {
+    print "ERROR: Could not open file $file\n";
     return 1;
   }
-  while(my $line = <FILE>){
+
+  while ( my $line = $file_io->getline() ) {
     chomp $line;
     my ($gene_id,$transcript_id,$source_name,$acc,$display_label,$full_description, $status)
       = split("\t",$line);
@@ -79,19 +79,11 @@ sub run {
     #biomart check
     #    $self->add_direct_xref($xref_id, $gene_id, "Gene", "")             if (defined($gene_id)); 
   }
-  close FILE;
+
+  $file_io->close();
 
   print "Added $added Xrefs for ncRNAs\n";
   return 0;
 }
 
-sub new {
-
-  my $self = {};
-  bless $self, "XrefParser::ncRNAParser";
-  return $self;
-
-}
-
-
 1;
diff --git a/misc-scripts/xref_mapping/xref_parser.pl b/misc-scripts/xref_mapping/xref_parser.pl
index 5d401928e1..2fe667106f 100644
--- a/misc-scripts/xref_mapping/xref_parser.pl
+++ b/misc-scripts/xref_mapping/xref_parser.pl
@@ -3,36 +3,45 @@ use strict;
 use Getopt::Long;
 use XrefParser::BaseParser;
 
-my ($host, $port, $dbname, $user, $pass, @species, @sources, $skipdownload, $checkdownload, $create, $release, $cleanup, $drop_existing_db, $deletedownloaded, $dl_path, @notsource);
-
-GetOptions('dbuser|user=s'       => \$user,
-	   'dbpass|pass=s'       => \$pass,
-	   'dbhost|host=s'       => \$host,
-	   'dbport|port=i'       => \$port,
-	   'dbname=s'            => \$dbname,
-	   'species=s'           => \@species,
-	   'source=s'            => \@sources,
-	   'download_dir=s'      => \$dl_path,
-	   'skipdownload'        => \$skipdownload,     # skips all downloads
-	   'checkdownload!'      => \$checkdownload,   # if file exists it won't be downloaded 
-	   'create'       => \$create,
-	   'setrelease=s' => \$release,
-	   'cleanup'      => \$cleanup,
-	   'notsource=s'    => \@notsource,
-	   'drop_db|dropdb!'     => \$drop_existing_db, # drops xref db without user interaction
-	   'delete_downloaded' => \$deletedownloaded,
-	   'download_path=s' => \$dl_path,
-	   'help'         => sub { usage(); exit(0); });
+my (
+    $host,             $port,             $dbname,
+    $user,             $pass,             @species,
+    @sources,          $skipdownload,     $checkdownload,
+    $create,           $release,          $cleanup,
+    $drop_existing_db, $deletedownloaded, $dl_path,
+    @notsource, $compressed
+);
+
+GetOptions(
+    'dbuser|user=s'  => \$user,
+    'dbpass|pass=s'  => \$pass,
+    'dbhost|host=s'  => \$host,
+    'dbport|port=i'  => \$port,
+    'dbname=s'       => \$dbname,
+    'species=s'      => \@species,
+    'source=s'       => \@sources,
+    'download_dir=s' => \$dl_path,
+    'skipdownload'   => \$skipdownload,     # skips all downloads
+    'checkdownload!' => \$checkdownload,    # don't download if exists
+    'create'         => \$create,
+    'setrelease=s'   => \$release,
+    'cleanup'        => \$cleanup,
+    'notsource=s'    => \@notsource,
+    'drop_db|dropdb!' =>
+      \$drop_existing_db,    # drops xref db without user interaction
+    'delete_downloaded' => \$deletedownloaded,
+    'download_path=s'   => \$dl_path,
+    'compressed' => \$compressed,   # don't force decompression of files
+    'help' => sub { usage(); exit(0); }
+);
 
 @species = split(/,/,join(',',@species));
 @sources  = split(/,/,join(',',@sources));
 
 
-if (!$user || !$host || !$dbname) {
-
-  usage();
-  exit(1);
-
+if ( !$user || !$host || !$dbname ) {
+    usage();
+    exit(1);
 }
 
 XrefParser::BaseParser::run(
@@ -43,7 +52,8 @@ XrefParser::BaseParser::run(
     $checkdownload,    $create,
     $release,          $cleanup,
     $drop_existing_db, $deletedownloaded,
-    $dl_path,          \@notsource
+    $dl_path,          \@notsource,
+    $compressed
 );
 
 # --------------------------------------------------------------------------------
-- 
GitLab