From 4918ced348318b285ce6b222242b59703b5d8486 Mon Sep 17 00:00:00 2001
From: Andrew Yates <ayates@ebi.ac.uk>
Date: Fri, 24 Feb 2012 09:54:33 +0000
Subject: [PATCH] Letting datafiles support more than just one file extension

---
 modules/Bio/EnsEMBL/DBSQL/DataFileAdaptor.pm | 29 +++++++---
 modules/Bio/EnsEMBL/DataFile.pm              | 57 +++++++++++++++-----
 2 files changed, 67 insertions(+), 19 deletions(-)

diff --git a/modules/Bio/EnsEMBL/DBSQL/DataFileAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DataFileAdaptor.pm
index 626ff6da45..ac3dbfc98d 100644
--- a/modules/Bio/EnsEMBL/DBSQL/DataFileAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/DataFileAdaptor.pm
@@ -54,7 +54,7 @@ use base qw/Bio::EnsEMBL::DBSQL::BaseAdaptor/;
 
 use Bio::EnsEMBL::DataFile;
 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
-use Bio::EnsEMBL::Utils::Exception qw/throw warning/;
+use Bio::EnsEMBL::Utils::Exception qw/throw warning deprecate/;
 use Bio::EnsEMBL::Utils::Scalar qw/:assert/;
 
 my $GLOBAL_BASE_PATH;
@@ -106,22 +106,37 @@ sub get_base_path {
   Description : Returns an expected extension for the given DataFile type
   Returntype  : Scalar of the expected file extension
   Exceptions  : Raised if the given file type is not understood
+  Status      : Deprecated
 
 =cut
 
 sub DataFile_to_extension {
+  my ($self, $df) = @_;
+  deprecate("Use DataFile_to_extensions() instead");
+  my $extensions = $self->DataFile_to_extensions($df);
+  return $extensions->[0];
+}
+
+=head2
+
+
+
+=cut
+
+sub DataFile_to_extensions {
   my ($self, $df) = @_;
   my $type = $df->file_type();
-  my $ext = {
-    BAM     => 'bam',
+  my $extensions = {
+    BAM     => ['bam', 'bam.bai'],
 #    BIGBED  => 'bb',
-    BIGWIG  => 'bw',
-    VCF     => 'vcf',
+    BIGWIG  => ['bw'],
+    VCF     => ['vcf.gz', 'vcf.gz.tbi'],
   }->{$type}; 
-  throw sprintf(q{No extension found for the type '%s'}, $type ) if ! $ext;
-  return $ext;
+  throw sprintf(q{No extensions found for the type '%s'}, $type ) if ! $extensions;
+  return $extensions;
 }
 
+
 =head2 DataFile_to_adaptor
 
   Arg[1]     	: Bio::EnsEMBL::DataFile
diff --git a/modules/Bio/EnsEMBL/DataFile.pm b/modules/Bio/EnsEMBL/DataFile.pm
index 230277f57e..7a0d0407c7 100644
--- a/modules/Bio/EnsEMBL/DataFile.pm
+++ b/modules/Bio/EnsEMBL/DataFile.pm
@@ -108,8 +108,16 @@ sub get_ExternalAdaptor {
 
 sub path {
   my ($self, $base) = @_;
+  my $all_paths = $self->get_all_paths($base);
+  return $all_paths->[0];
+}
+
+sub get_all_paths {
+  my ($self, $base) = @_;
+    
+  return [$self->url()] if $self->absolute();
   
-  return $self->url() if $self->absolute();
+  my @all_paths;
   
   $base = $self->adaptor()->get_base_path($base) if ! $base;
 
@@ -130,26 +138,33 @@ sub path {
   push(@portions, software_version()) if $self->version_lock();
   push(@portions, $self->adaptor()->db()->group());
   
+  #Targets are the files to generate
+  my @targets;
   #If URL is populated we assume we need to add this onto the end but removing the /
   if($self->url()) {
     my @split = split(/\//, $self->url());
-    push(@portions, @split);
+    push(@targets, [@split]);
   }
   else {
-    my $ext = $self->adaptor()->DataFile_to_extension($self);
-    my $filename = sprintf(q{%s.%s}, $self->name(), $ext);
-    push(@portions, $filename);
+    my $extensions = $self->adaptor()->DataFile_to_extensions($self);
+    foreach my $ext (@{$extensions}) {
+      my $filename = sprintf(q{%s.%s}, $self->name(), $ext);
+      push(@targets, [$filename]);
+    }
   }
   
   my $is_uri = is_uri($base);
-  my $path;
-  if($is_uri) {
-    $path = join(q{/}, $base, @portions);
+  foreach my $t (@targets) {
+    my $path;
+    if($is_uri) {
+      $path = join(q{/}, $base, @portions, @{$t});
+    }
+    else {
+      $path = File::Spec->catfile($base, @portions, @{$t});
+    }
+    push(@all_paths, $path);
   }
-  else {
-    $path = File::Spec->catfile($base, @portions);
-  }
-  return $path;
+  return \@all_paths;
 }
 
 =head2 coord_system
@@ -273,4 +288,22 @@ sub file_type {
   return $self->{'file_type'};
 }
 
+#=head2 files
+#
+#  Args       	: 
+#  Example			: my $files = @{$df->files()};
+#  Description	: Returns all the file names we expect to cover for a flat file
+#  Returntype 	: type return_description
+#  Exceptions 	: 
+#  Caller     	: caller
+#  Status     	: status
+#
+#=cut
+#
+#
+#sub files {
+#  my ($self) = @_;
+#  
+#}
+
 1;
\ No newline at end of file
-- 
GitLab