From 6eec47a7ae80b26e08d4f14d27e16f9b94e9e5c7 Mon Sep 17 00:00:00 2001
From: Glenn Proctor <gp1@sanger.ac.uk>
Date: Fri, 23 Jun 2006 07:47:13 +0000
Subject: [PATCH] Support .zip files, and # notation for specifying which file
 to use from the zip file.

---
 .../xref_mapping/XrefParser/BaseParser.pm         | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm
index 288e62595f..258cf9b2fd 100644
--- a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm
@@ -227,8 +227,21 @@ sub run {
 
       }
       else{
-	if ($file =~ /(.*)\.gz$/ or $file =~ /(.*)\.Z$/ or $file =~ /(.*)\.zip$/) {
+	if ($file =~ /(.*)\.gz$/ or $file =~ /(.*)\.Z$/) {
 	  $file = $1;
+	} elsif ($file =~ /\.zip/) {
+	  # .zip files are archives
+	  # that can contain several files. The file to be extracted is specified
+	  # by a hash in the URL in populate_metadata.sql, e.g.
+	  # http://www.illumina.com/General/Products/ArraysReagents/zip_files/Human_WG-6_rev.zip#Human_WG-6_rev.csv
+	  # TODO - maybe add support for .tar.gz here as well
+	  if ($file =~ /(.*\.zip)\#(.*)$/) {
+	    my $archive = $1;
+	    $file = $2;
+	    print "Using $file from archive $archive\n";
+	  } else {
+	    die "$file specifies a .zip file withouut using the # notation to specify the file in the archive to be used.";
+	  }
 	}
       }
 
-- 
GitLab