FileAdaptor.pm 6.1 KB
Newer Older
1 2
=head1 LICENSE

3
  Copyright (c) 1999-2013 The European Bioinformatics Institute and
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <dev@ensembl.org>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::DBFile::FileAdaptor - Base Adaptor for direct file access

=head1 DESCRIPTION

Basic wrapper class to provide access to file based data.

This is primarily aimed at indexed Collection(.col) files which are optimised for Slice 
based queries. Collections store fixed width width/windowed data as BLOBS.  This makes 
it possible to seek to the a required location given slice coordinate and read the only 
the required amount of data covering the slice.

Currently only works as hybrid DBAdaptor e.g. ResultFeatureAdaptor which inherits both from 
here and BaseFeatureAdaptor.

=cut



package Bio::EnsEMBL::DBFile::FileAdaptor;

use Bio::EnsEMBL::Utils::Exception qw(throw warning deprecate);
use strict;
use warnings;


=head2 get_filehandle

  Arg[1]     : string     - filepath
  Arg[2]     : HASHREF    - Optional params, see open_file
  Example    : my $fh     = $self->get_filehandle($filepath, 1);
  Description: Gets and caches a simple file handle.
  Returntype : GLOB/undef - filehandle
  Exceptions : warns if cache entry exists but is not defined 
  Caller     : general
  Status     : at risk

=cut

sub get_filehandle{
  my ($self, $filepath, $params_hash) = @_;

  my $file_op = '<';

  if(exists $params_hash->{-file_operator}){
	$file_op = $params_hash->{-file_operator};
  }else{
	$params_hash->{-file_operator} = $file_op;
  }

  if(! exists $self->{file_cache}{$filepath}{filehandle}){
	my $fh = $self->Bio::EnsEMBL::DBFile::FileAdaptor::open_file($filepath, $params_hash);

	if(defined $fh){
	  $self->{file_cache}{$filepath}{filehandle} = $fh;
	  #$self->initialise_filehandle($filepath) if $self->can('initialise_filehandle');
	  $self->initialise_filehandle($filepath) if($file_op eq '<');
	}
  }
  elsif(! defined $self->{file_cache}{$filepath}{filehandle}){
	#This maybe one of several read/seek errors which will have already been warned
	warn "Encountered and error with file handle for $filepath\n";
  }
  #else
  # check against cache file op
  # to make sure we aren't trying to open an already open fh with a different operator

 
  return $self->{file_cache}{$filepath}{filehandle};
}


=head2 open_file

  Arg[1]     : string     - filepath
  Arg[2]     : HASHREF    - Optional params:
Kieron Taylor's avatar
Kieron Taylor committed
98 99 100
                          -binmode       => 0|1,   # Boolean i.e. treat file as binary
                          -file_operator => '>'    # Default is '<'
                         #-perms_octal   =>  # Requires FileHandle
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
  Example    : my $fh     = $self->open_file($filepath, {-binmode = > 1, -file_operator => '>'});
  Description: Opens a file for reading or writing.
  Returntype : GLOB/undef - filehandle
  Exceptions : warns if file open fails
               warns if file operator unsupported
               warns if failed to set binmode
  Caller     : general
  Status     : at risk

=cut

sub open_file{
  my ($self, $filepath, $params_hash) = @_;

  #Validate params_hash? 
  #rearrange? Will not warn/throw for invalid keys?
  #perms octal, requires FileHandle? See EFGUtils::open_file



  my $file_op = $params_hash->{-file_operator} || '<';

  if(($file_op ne '<') &&
	 ($file_op ne '>') &&
	 ($file_op ne '>>')){
	throw("Cannot perform open with unsupported operator:\t${file_op}${filepath}");
  }

129
  my $success = open my $fh, $file_op, $filepath;
130
  #$fh will be still be GLOB on fail
131 132 133 134 135 136
  
  #These warn instead of throw/die to allow
  #open_file to be used to test a file
  #this prevents throws/die when an attempting to access an absent file (good for webcode)
  #could alternatively change to throw/die and eval where required
  #prevents need to catch everywhere else and potential double reporting of error
137 138

  if(! $success){
139 140
	#undef $fh;
	throw("Failed to open:\t$filepath\n$!\n");
141 142 143 144 145
  }
  elsif($params_hash->{-binmode}){
	$success = binmode $fh;
	  
	if(! $success){
146 147
	  throw("Failed to set binmode:\t$filepath\n$!");
	  #undef $fh;
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
	}
  }

  return $fh;
}


=head2 validate_file_length

  Arg[1]     : string  - filepath
  Arg[2]     : int     - expected length in bytes
  Example    : $self->validate_file_length($filepath, $expected_length);
  Description: Utility method which can be used during file creation
  Returntype : None
  Exceptions : warns if file open fails
               throws if file is not expected length
  Caller     : general
  Status     : at risk - change to seek to accounts for 'logical characters'

=cut

sub validate_file_length{
  my ($self, $filepath, $expected_length, $binmode) = @_;

  #Currently not using cache as we rarely want to 
  #use the file handle afterwards


  #THIS WAS USING EFGUtils::open_file imported in the Collector::ResultFeature!!!!
  #which is just a sub not a class method, and is in a parallel inheritance path
  #No warnings about redefining method :(
  #Force use of FileAdaptor::open_file

  my $fh = $self->Bio::EnsEMBL::DBFile::FileAdaptor::open_file($filepath, {-binmode => $binmode});


  #sysseek always returns length in bytes, change to seek which 
  #uses logical characters i.e. actual encoding?
  #Does seek use bytes in binmode and chars in non-binmode?

  my $seeked_bytes = sysseek($fh, 0, 2);# 2 is SEEK_END
  #There is no systell function. Use sysseek(FH, 0, 1) for that.

  if($seeked_bytes < $expected_length){
	throw("File is shorter($seeked_bytes) than expected($expected_length):\t$filepath\n");
  }
  elsif($seeked_bytes > $expected_length){
	throw("File is longer($seeked_bytes) than expected($expected_length):\t$filepath\n");
  }
 
  return;
}





### STUBB/TEMPLATE METHODS ###
#
#   If required hese should be over-ridden in the 
#   descendant FileAdaptor e.g. CollectionAdaptor
#   Listed here rather for visibility (rather than 
#   using 'can')


sub initialise_filehandle{
  return;
}



1;