From 96f7b6c6d9259772da37286f31d90e8fa6a5cd7d Mon Sep 17 00:00:00 2001
From: Kieron Taylor <ktaylor@ebi.ac.uk>
Date: Fri, 28 Oct 2011 15:28:32 +0000
Subject: [PATCH] Introduced Utils::IO namespace along with serializer code for
 GFF3 and human-readable formats. Pre-release, not intended for public use.
 Also added Utils::BiotypeMapper that performs ontology mappings.

---
 modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm    | 221 ++++++++++++++++++
 modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm | 214 +++++++++++++++++
 .../Bio/EnsEMBL/Utils/IO/ReportSerializer.pm  | 209 +++++++++++++++++
 modules/Bio/EnsEMBL/Utils/IO/Serializer.pm    | 160 +++++++++++++
 4 files changed, 804 insertions(+)
 create mode 100644 modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm
 create mode 100644 modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm
 create mode 100644 modules/Bio/EnsEMBL/Utils/IO/ReportSerializer.pm
 create mode 100644 modules/Bio/EnsEMBL/Utils/IO/Serializer.pm

diff --git a/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm b/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm
new file mode 100644
index 0000000000..d5148f0053
--- /dev/null
+++ b/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm
@@ -0,0 +1,221 @@
+=pod
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2011 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 NAME
+
+BiotypeMapper - Translates EnsEMBL biotypes into Sequence Ontology terms and back
+
+=head1 AUTHOR
+
+Kieron Taylor, 2011 - ktaylor@ebi.ac.uk
+
+=head1 SYNOPSIS
+
+use Bio::EnsEMBL::Utils::BiotypeMapper
+
+my $ontology_adaptor = $registry->get_adaptor( 'Multi', 'Ontology', 'OntologyTerm' );
+my $biotype_mapper = new BiotypeMapper($ontology_adaptor);
+
+print $biotype_mapper->translate_feature_to_SO_term($feature);
+
+=head1 DESCRIPTION
+
+BiotypeMapper provides a series of nearest matches between EnsEMBL biotypes and
+the Sequence Ontology (http://www.sequenceontology.org)
+
+Mappings are imperfect due to the inexact correspondance of biotypes to 
+several SO terms. The a best guess has been chosen in each case.
+
+Reverse mappings from SO to biotype are vague, due to many-to-one relationships.
+In this case a list of possible terms is given. 
+
+=cut
+
+package Bio::EnsEMBL::Utils::BiotypeMapper;
+
+use strict;
+use warnings;
+use Carp;
+
+my %gene_so_mapping = (
+	'protein_coding' 		=> 'SO:0001217', # protein_coding_gene
+	'pseudogene' 			=> 'SO:0000336', # pseudogene
+	'processed_transcript' 	=> 'SO:0001503', # processed_transcript
+	'lincRNA' 				=> 'SO:0001641', # lincRNA_gene
+	'polymorphic_pseudogene'=> 'SO:0000336', # pseudogene
+	'Mt_tRNA' 				=> 'SO:0000088', # mt_gene
+	'IG_D_gene' 			=> 'SO:0000510', # D_gene
+	'snoRNA' 				=> 'SO:0001267', #snoRNA_gene
+	'misc_RNA' 				=> 'SO:0000356', #RNA
+	'miRNA' 				=> 'SO:0001265', #miRNA_gene
+	'rRNA' 					=> 'SO:0001637', #rRNA_gene
+	'snRNA'					=> 'SO:0001268', #snRNA_gene
+	'snRNA_pseudogene'		=> 'SO:0000336', # pseudogene
+	'tRNA_pseudogene'		=> 'SO:0000778', # pseudogenic_tRNA
+	'rRNA_pseudogene'		=> 'SO:0000777', # pseudogenic_rRNA
+	'TR_J_gene'				=> 'SO:0000470', # J_gene
+	'TR_V_gene'				=> 'SO:0000466', # V_gene
+	'TR_C_gene'				=> 'SO:0000478', # C_gene
+	'ncRNA'					=> 'SO:0001263', # ncRNA_gene
+	'tRNA'					=> 'SO:0001272', # tRNA_gene
+	'retrotransposed'		=> 'SO:0000569', # retrotransposed
+## heavily abbreviated
+	 );
+
+my %transcript_so_mapping = (
+	'processed_transcript' 				=> 'SO:0001503', # processed_transcript
+	'nonsense_mediated_decay' 			=> 'SO:0001621', # NMD_transcript_variant
+	'retained_intron' 					=> 'SO:0000681', # aberrant_processed_transcript
+	'transcribed_unprocessed_pseudogene'=> 'SO:0000516', # pseudogenic_transcript
+	'processed_pseudogene' 				=> 'SO:0000043', # processed_pseudogene
+	'unprocessed_pseudogene' 			=> 'SO:0000336', # pseudogene
+	'unitary_pseudogene'				=> 'SO:0000336',
+	'pseudogene' 						=> 'SO:0000336', # pseudogene
+	'transcribed_processed_pseudogene'	=> 'SO:0000043', 
+	'retrotransposed' 					=> 'SO:0000569', #retrotransposed
+	'ncrna_host' 						=> 'SO:0000483',
+	'polymorphic_pseudogene'			=> 'SO:0000336',
+	'lincRNA'							=> 'SO:0001463',
+	'ncrna_host'						=> 'SO:0000483',
+	'3prime_overlapping_ncrna'			=> 'SO:0000483',
+	'TR_V_gene'							=> 'SO:0000466',
+	'TR_V_pseudogene'					=> 'SO:0000336',
+
+	'TR_J_gene'							=> 'SO:0000470',
+	'IG_C_gene'							=> 'SO:0000478',
+	'IG_C_pseudogene'					=> 'SO:0000336',
+	'TR_C_gene'							=> 'SO:0000478',
+	'IG_J_pseudogene'					=> 'SO:0000336',
+	'miRNA'								=> 'SO:0000276', #miRNA
+	'miRNA_pseudogene'					=> 'SO:0000336',
+	'disrupted_domain' 					=> 'SO:0000681', # aberrant_processed_transcript
+	'rRNA' 								=> 'SO:0000252', #rRNA
+	'rRNA_pseudogene'					=> 'SO:0000777', 
+	'scRNA_pseudogene'					=> 'SO:0000336',
+	'snoRNA' 							=> 'SO:0000275', # snoRNA
+	'snoRNA_pseudogene'					=> 'SO:0000336',
+	'snRNA'								=> 'SO:0000274', # snRNA
+	'snRNA_pseudogene'					=> 'SO:0000336',
+
+	);
+
+my %feature_so_mapping = (
+	'Bio::EnsEMBL::Gene' => 'SO:0000704', # gene
+	'Bio::EnsEMBL::Transcript' => 'SO:0000673', # transcript
+	'Bio::EnsEMBL::Slice' => 'SO:0000001', # region
+	'Bio::EnsEMBL::Variation::VariationFeature' => 'SO:0001060', # sequence variant
+	'Bio::EnsEMBL::Variation::StructuralVariationFeature' => 'SO:0001537', # structural variant
+    'Bio::EnsEMBL::Compara::ConstrainedElement' => 'SO:0001009', #DNA_constraint_sequence ????
+	'Bio::EnsEMBL::Funcgen::RegulatoryFeature' => 'SO:0001679', # transcription_regulatory_region
+);
+
+=head2 new
+
+    Constructor
+    Arg [1]    : OntologyAdaptor from the EnsEMBL registry
+	Returntype : Bio::EnsEMBL::BiotypeMapper
+
+=cut
+
+sub new {
+	my $class = shift;
+	my $self = { 	
+			ontology_adaptor => shift,
+	};
+
+	bless $self, $class;
+    return $self;
+}
+
+=head2 translate_feature_to_SO_term
+
+	Arg [0]    : Bio::EnsEMBL::Feature, subclass or related Storable
+	Description: Translates a Feature type into an SO term. If the Feature is a
+	             Gene or Transcript, then a further refinement of the type is made
+				 via Biotype
+	Returntype : String
+
+=cut
+
+sub translate_feature_to_SO_term {
+	my $self = shift;
+	my $feature = shift;
+	my $so_accession;
+	my $so_term;
+	if (ref($feature) eq "Bio::EnsEMBL::Gene" and exists $gene_so_mapping{$feature->biotype}) {
+		$so_accession = $gene_so_mapping{$feature->biotype};
+	}
+	elsif (ref($feature) eq "Bio::EnsEMBL::Transcription" and exists $transcript_so_mapping{$feature->biotype}) {
+		$so_accession = $transcript_so_mapping{$feature->biotype};
+	}
+	else {
+		$so_accession = $feature_so_mapping{ref($feature)};
+	}
+	if (defined($so_accession)) {
+		$so_term = $self->{'ontology_adaptor'}->fetch_by_accession($so_accession);
+	}
+	else {
+		carp "Ontology mapping not found for ".ref($feature)."\n";
+		return "????????";
+	}
+
+	return $so_term->name;
+}
+
+
+=head2 translate_SO_to_biotype
+
+	Arg [0]    : Sequence Ontology term, either in name or URI format
+	Description: Returns the closest corresponding Ensembl biotypes to a given SO term
+	Returntype : String containing a comma-separated list of biotypes
+=cut
+
+sub translate_SO_to_biotype {
+	my $self = shift;
+	my $translate_me = shift;
+
+	my @so_names;
+# look up text in ontology database
+	if ($translate_me !~ /^SO:/) {
+		my $so_terms = $self->{'ontology_adaptor'}->fetch_all_by_name($translate_me);
+		@so_names = [];
+		foreach my $term (@{$so_terms}) {
+			push @so_names,$term->accession();
+		}
+	}
+	else {
+		push @so_names,$translate_me;
+	}
+# convert list of accessions into biotypes
+	my @biotypes;
+	foreach my $accession (@so_names) {
+		foreach	my $key (keys %gene_so_mapping) {
+			if ($gene_so_mapping{$key} eq $accession) {
+				push @biotypes,$key;
+			}
+		}
+		foreach my $key (keys %transcript_so_mapping) {
+			if ($transcript_so_mapping{$key} eq $accession) {
+				push @biotypes,$key;
+			}
+		}
+		foreach my $key (keys %feature_so_mapping) {
+			if ($feature_so_mapping{$key} eq $accession) {
+				push @biotypes,$key;
+			}
+		}
+	}
+
+	return join (',',@biotypes);
+}
+
+1;
diff --git a/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm b/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm
new file mode 100644
index 0000000000..345564f64b
--- /dev/null
+++ b/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm
@@ -0,0 +1,214 @@
+=pod
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2011 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 NAME
+
+GFFSerializer - Feature to GFF converter
+
+=head1 AUTHOR
+
+Kieron Taylor, 2011 - ktaylor@ebi.ac.uk
+
+=head1 SYNOPSIS
+
+use Bio::EnsEMBL::Utils::IO::GFFSerializer;
+use Bio::EnsEMBL::Utils::BiotypeMapper;
+
+my $ontology_adaptor = $registry->get_adaptor( 'Multi', 'Ontology', 'OntologyTerm' );
+my $biotype_mapper = new BiotypeMapper($ontology_adaptor);
+my $serializer = new GFFSerializer($biotype_mapper,$output_fh);
+
+my $variation_feature_adaptor = $registry->get_adaptor( $config{'species'}, 'variation', 'variationfeature' );
+$serializer->print_metadata("Variation Features:");
+my $iterator = $variation_feature_adaptor->fetch_Iterator_by_Slice($slice,undef,60000);
+$serializer->print_feature_Iterator($iterator);
+
+=head1 DESCRIPTION
+
+Subclass of Serializer that can turn a feature into a line for the GFF3 format. Requires
+a BiotypeMapper in order to translate biotypes to SO terms.
+
+=cut
+
+package Bio::EnsEMBL::Utils::IO::GFFSerializer;
+use strict;
+use warnings;
+use Bio::EnsEMBL::Utils::Exception;
+use Bio::EnsEMBL::Utils::BiotypeMapper;
+use URI::Escape;
+use Bio::EnsEMBL::Utils::IO::Serializer;
+
+use base qw(Bio::EnsEMBL::Utils::IO::Serializer);
+
+my %strand_conversion = ( '1' => '+', '0' => '?', '-1' => '-');
+
+=head2 new
+
+	Constructor
+	Arg [1]    : BiotypeMapper
+	Arg [2]    : Optional File handle
+	
+	Returntype : Bio::EnsEMBL::Utils::IO::Serializer
+
+=cut
+
+sub new {
+	my $class = shift;
+	my $self = {
+		mapper => shift,
+		filehandle => shift,
+	};
+	bless $self, $class;
+	if ( ref($self->mapper) ne "Bio::EnsEMBL::Utils::BiotypeMapper" ) {
+		throw("GFF format requires an instance of Bio::EnsEMBL::Utils::BiotypeMapper to function");		
+	}
+	
+	if (!defined ($self->{'filehandle'})) {
+		# no file handle, let the handle point to a copy of STDOUT instead
+		open $self->{'filehandle'}, ">&STDOUT";
+		$self->{'stdout'} = 1;
+	}
+	return $self;
+}
+
+=head2 print_feature
+
+	Arg [1]    : Bio::EnsEMBL::Feature, subclass or related pseudo-feature
+	Example    : $reporter->print_feature($feature,$slice_start_coordinate,"X")
+	Description: Asks a feature for its summary, and generates a GFF3 compliant entry to hand back again
+	             Additional attributes are handed through to column 9 of the output using exact spelling
+				 and capitalisation of the feature-supplied hash.
+	Returntype : none
+=cut
+
+sub print_feature {
+	my $self = shift;
+	my $feature = shift;
+	my $biotype_mapper = $self->{'mapper'};
+
+	my $text_buffer = "";
+    if ($feature->can('summary_as_hash') ) {
+		my %summary = %{$feature->summary_as_hash};
+		my $row = "";
+#	Column 1 - seqname, the name of the sequence/chromosome the feature is on. Landmark for start below
+		if (!defined($summary{'seq_region_name'})) {$summary{'seq_region_name'} = "?";}
+		$row .= $summary{'seq_region_name'}."\t";
+
+#	Column 2 - source, complicated with Ensembl not being the originator of all data
+		$row .= "EnsEMBL\t";
+
+#   Column 3 - feature, the ontology term for the kind of feature this row is
+		my $so_term = $biotype_mapper->translate_feature_to_SO_term($feature);
+		$row .= $so_term."\t";
+
+#	Column 4 - start, the start coordinate of the feature, here shifted to chromosomal coordinates
+#	Start and end must be in ascending order for GFF. Circular genomes require the length of 
+#   the circuit to be added on.
+		if ($summary{'start'} > $summary{'end'}) {
+			#assumes this is not a Compara circular sequence and can treat is as a Feature
+			if ($feature->slice() && $feature->slice()->is_circular() ) {
+				$summary{'end'} = $summary{'end'} + $feature->seq_region_length;
+			}
+			# non-circular, but end still before start
+			else {$summary{'end'} = $summary{'start'};}
+		}
+		$row .= $summary{'start'} . "\t";
+
+#	Column 5 - end, coordinates (absolute) for the end of this feature
+		$row .= $summary{'end'} . "\t";
+
+#	Column 6 - score, for variations only.
+		if (exists($summary{'score'})) {
+			$row .= $summary{'score'}."\t";
+		}
+		else {
+			$row .= ".\t";
+		}
+
+#	Column 7 - strand, up or down
+		if (exists($summary{'strand'})) {
+			$row .= $strand_conversion{$summary{'strand'}}."\t";
+		}
+		else {
+			$row .= ".\t";
+		}
+
+#   Column 8 - reading frame, necessary only for Exons
+		$row .= ".\t";
+
+#	Column 9 - the 'other' section for all GFF and GVF compliant attributes
+#	We include Stable ID and biotype where possible to supplement the information in the other columns
+		delete $summary{'seq_region_start'};
+		delete $summary{'seq_region_name'};
+		delete $summary{'start'};
+		delete $summary{'end'};
+		delete $summary{'strand'};
+		delete $summary{'score'};
+#   Slice the hash for specific keys in GFF-friendly order
+		my @ordered_keys = qw(ID Name Alias Parent Target Gap Derives_from Note Dbxref Ontology_term Is_circular);
+		my @ordered_values = @summary{@ordered_keys};
+		while (my $key = shift @ordered_keys) {
+			my $value = shift @ordered_values;
+			if ($value) {
+				$row .= $key."=".uri_escape($value,'\t\n\r;=%&,').";";
+			}
+			delete $summary{$key};
+		}
+#   Catch the remaining keys, containing whatever else the Feature provided
+		foreach my $attribute ( keys(%summary)) {
+			if (ref $summary{$attribute} eq "ARRAY") {
+				$row .= $attribute."=".join (',',@{$summary{$attribute}}) . ";"
+			}
+			else {
+				if ($summary{$attribute}) { $row .= $attribute."=".uri_escape($summary{$attribute},'\t\n\r;=%&,') . ";"; }
+			}
+		}
+# trim off any trailing commas left by the ordered keys stage above:
+		$text_buffer .= $row."\n";
+	}
+	else {
+		warning("Feature failed to self-summarise");
+	}
+	#filehandle is inherited
+	my $fh = $self->{'filehandle'};
+	print $fh $text_buffer;
+}
+
+=head2 print_main_header
+
+    Arg [1]    : Arrayref of slices going into the file.
+	Description: Printing the header text or metadata required for GFF,
+		         using a list of slices to be written
+    Returntype : None
+=cut
+
+sub print_main_header {
+	my $self = shift;
+	my $arrayref_of_slices = shift;
+	my $fh = $self->{'filehandle'};
+	
+	print $fh "##gff-version 3\n";
+	foreach my $slice (@{$arrayref_of_slices}) {
+		if (not defined($slice)) { warning("Slice not defined.\n"); return;}
+		print $fh "##sequence-region   ",$slice->seq_region_name," ",$slice->start," ",$slice->end,"\n";
+	}
+}
+
+sub print_metadata {
+	my $self = shift;
+	my $text = shift;
+	my $fh = $self->{'filehandle'};
+	print $fh "\n# ".$text."\n";
+}
+
+
+1;
diff --git a/modules/Bio/EnsEMBL/Utils/IO/ReportSerializer.pm b/modules/Bio/EnsEMBL/Utils/IO/ReportSerializer.pm
new file mode 100644
index 0000000000..ae400328c9
--- /dev/null
+++ b/modules/Bio/EnsEMBL/Utils/IO/ReportSerializer.pm
@@ -0,0 +1,209 @@
+=pod
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2011 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+  http://www.ensembl.org/info/about/code_licence.html
+
+=head1 NAME
+
+Report Serializer - generating textual summary reports
+
+=head1 AUTHOR
+
+Kieron Taylor, 2011 - ktaylor@ebi.ac.uk
+
+=head1 SYNOPSIS
+
+	use Bio::EnsEMBL::Registry;
+	use Bio::EnsEMBL::Utils::IO::ReportSerializer;
+	use IO::File;
+	
+	my $registry = 'Bio::EnsEMBL::Registry';
+	$output_fh = IO::File->new($config{'output'},'w') or die;
+	$serializer = new ReportSerializer($output_fh);
+	my $slice_adaptor = $registry->get_adaptor( 'Human', 'Core', 'Slice' );
+	my $slice = $slice_adaptor->fetch_by_toplevel_location("6:1000000..1500000");
+	
+	$serializer->print_section_header($slice);
+	$serializer->print_feature_list($slice->get_all_Genes);
+
+=head1 DESCRIPTION
+
+Subclass of Serializer that can turn a feature into a text block
+Unsuited to very large slices, because it requires a select-all approach for features.
+
+=cut
+
+package Bio::EnsEMBL::Utils::IO::ReportSerializer;
+use strict;
+use warnings;
+use Bio::EnsEMBL::Utils::Exception;
+use URI::Escape;
+use Bio::EnsEMBL::Utils::IO::Serializer;
+
+use base qw(Bio::EnsEMBL::Utils::IO::Serializer);
+
+my %strand_conversion = ( '1' => '+', '0' => '?', '-1' => '-');
+
+my %feature_conversion = ( 	'Bio::EnsEMBL::Gene' => 'Gene',
+							'Bio::EnsEMBL::Transcript' => 'Transcript',
+							'Bio::EnsEMBL::Translation' => 'Translation',
+							'Bio::EnsEMBL::Variation::StructuralVariationFeature' => 'Structural Variation',
+							'Bio::EnsEMBL::Variation::VariationFeature' => 'Variation',
+							'Bio::EnsEMBL::Funcgen::RegulatoryFeature' => 'Regulatory Feature',
+							'Bio::EnsEMBL::Compara::ConstrainedElement' => 'Constrained Element',
+);
+
+# Hash for selecting the correct attributes of unseen features for crude summary. This hash is 
+# for fallback behaviour, slicing summary hashes for a limited set of values.
+my %printables = ( 
+					'Bio::EnsEMBL::Gene' => ['ID','biotype','start','end'],
+					'Bio::EnsEMBL::Transcript' => ['ID','start','end'],
+					'Bio::EnsEMBL::Translation' => ['ID'],
+					'Bio::EnsEMBL::Variation::VariationFeature' => ['ID','start','end','strand','seq_region_name'],
+					'Bio::EnsEMBL::Variation::StructuralVariationFeature' => ['ID','start','end','strand','seq_region_name'],
+					'Bio::EnsEMBL::Funcgen::RegulatoryFeature' => ['ID','start','end','strand'],
+					'Bio::EnsEMBL::Compara::ConstrainedElement' => ['ID','start','end','strand','seq_region_name'],
+				);
+
+=head2 print_feature
+
+    Arg [1]    : Bio::EnsEMBL::Feature, subclass or related pseudo-feature
+    Example    : $reporter->print_feature($feature,$slice_start_coordinate,"X")
+=cut
+
+sub print_feature {
+	my $self = shift;
+	my $feature = shift;
+	my $fh = $self->{'filehandle'};
+	my $feature_type = ref($feature);
+	
+	if ($feature->can('summary_as_hash') ) {
+	    my %summary = %{$feature->summary_as_hash};
+		my @values = @summary{ @{$printables{$feature_type}} };
+		print $fh join(',',@values)."\n";
+	}
+	else {
+		warning("Feature failed to self-summarise");
+    }
+}
+
+=head2 print_feature_list
+
+    Arg [1]    : Listref of Bio::EnsEMBL::Feature, subclass or related pseudo-feature
+	Description: Relies on a list of similar features to print in a block together.
+	             Overrides superclass method
+	             Results are truncated after the first 100 features for brevity.
+    Example    : $reporter->print_feature_list(\@features);
+=cut
+
+sub print_feature_list {
+    my $self = shift;
+	my $feature_list = shift;
+	my $fh = $self->{'filehandle'};
+
+	my $example_feature = $feature_list->[0];
+	my $feature_type = ref($example_feature);
+	my $feature_count = 0;
+	unless (defined $feature_type) {$feature_type = "Feature"};
+	print $fh "There are ",scalar(@$feature_list)," ",$feature_conversion{$feature_type},(scalar(@$feature_list) != 1) ? "s":""," in this region\n";
+	if (scalar(@$feature_list) > 100 ) { print $fh "Too many to display, results truncated to the first 100\n";}
+	print $fh "\n";
+	foreach my $feature (@$feature_list) {
+		$feature_count++;
+		my %attributes = %{$feature->summary_as_hash};
+		
+		if ($feature_count == 100) {last;}
+		# Begin the feature-specific formatting code
+		if ($feature_type eq "Bio::EnsEMBL::Gene") {
+			print $fh "\tGene ".$feature_count.": ".$attributes{'external_name'}.",".$attributes{'ID'}."\n";
+			print $fh "\tBiotype: ".$attributes{'biotype'}."\n";
+			print $fh "\tLocation: ".$attributes{'start'}."-".$attributes{'end'}." bp\n\n";
+			
+			print $fh "\tTranscripts and proteins\n";
+			foreach my $transcript (@{$feature->get_all_Transcripts}) {
+				my %tr_summary = %{$transcript->summary_as_hash};
+				print $fh "\t\t ".$tr_summary{'ID'};
+				my $translation = $transcript->translation;
+				if (defined $translation) {
+					my %pr_summary = %{$translation->summary_as_hash};
+					print $fh " - ".$pr_summary{'ID'}."\n\n";
+				}
+				else {print $fh " - no protein\n\n";}
+			}
+			print $fh "\n";
+		}
+		elsif ($feature_type eq "Bio::EnsEMBL::Funcgen::RegulatoryFeature") {
+			print $fh "\t".$attributes{'ID'}."\n";
+		}
+		elsif ($feature_type eq "Bio::EnsEMBL::Compara::ConstrainedElement") {
+			print $fh "\t".$attributes{'start'}."-".$attributes{'end'}."\n";
+		} 
+		elsif ( $feature_type eq "Bio::EnsEMBL::Variation::StructuralVariationFeature" 
+			or $feature_type eq "Bio::EnsEMBL::Variation::VariationFeature") {
+			print $fh "\tID: ".$attributes{'ID'}."  Position: ".
+				$attributes{'start'}."-".$attributes{'end'}." on strand ".$attributes{'strand'}." \n";
+		}
+		else {
+			# slice favourite values out unformatted.
+			my @values = @attributes{ @{$printables{$feature_type}} };
+			print $fh $feature_type.join(',',@values)."\n";
+			
+		}
+	}
+}
+
+# Just print individuals without awareness of list size and position.
+sub print_feature_iterator {
+	my $self = shift;
+	my $feature_iterator = shift;
+	while ($feature_iterator->has_next) {
+		my $feature = $feature_iterator->next;
+		$self->print_feature($feature);
+	}
+}
+
+=head2 print_main_header
+
+    Arg [1]    : Arrayref of slices going into the file.
+	Description: Printing the header text for this report
+                 Requires a slice list in order to report how many will be printed
+    Returntype : None
+=cut
+
+sub print_main_header {
+	my $self = shift;
+	my $arrayref_of_slices = shift;
+	my $fh = $self->{'filehandle'};
+
+	my $regions = scalar @{$arrayref_of_slices};
+	print $fh "Report for $regions region";
+	if ($regions > 1) { print $fh "s";}
+	print $fh "\n\n";
+}
+
+
+=head2 print_section_header 
+
+	Arg [1]    : Bio::EnsEMBL::Slice
+	Description: Prints a summary of the slice
+	             Intended to be used prior to print_feature_list()
+	Returntype : None
+
+=cut
+
+sub print_section_header {
+	my $self = shift;
+	my $slice = shift;
+	my $fh = $self->{'filehandle'};
+
+	print $fh "  Region: ",$slice->seq_region_name," ",$slice->start,"-",$slice->end," bp\n\n";
+
+}
+
diff --git a/modules/Bio/EnsEMBL/Utils/IO/Serializer.pm b/modules/Bio/EnsEMBL/Utils/IO/Serializer.pm
new file mode 100644
index 0000000000..2dff8efdd9
--- /dev/null
+++ b/modules/Bio/EnsEMBL/Utils/IO/Serializer.pm
@@ -0,0 +1,160 @@
+=pod
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2011 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+  http://www.ensembl.org/info/about/code_licence.html
+
+=head1 NAME
+
+Serializer - An abstract serializer for turning EnsEMBL data into other formats
+
+=head1 AUTHOR
+
+Kieron Taylor, 2011 - ktaylor@ebi.ac.uk
+
+=head1 SYNOPSIS
+
+my $serializer = new Serializer( $filehandle );
+$serializer->print_feature_list( \@list_of_features );
+
+=head1 DESCRIPTION
+
+Subclass this class to create a format-specific serializer.
+Be sure to implement print_feature at the bare minimum
+
+=cut
+
+package Bio::EnsEMBL::Utils::IO::Serializer;
+use strict;
+use warnings;
+use Bio::EnsEMBL::Utils::Exception;
+use Bio::EnsEMBL::Utils::SeqDumper;
+
+
+=head2 new
+
+	Constructor
+	Arg [1]    : Optional File handle
+	Returntype : Bio::EnsEMBL::Utils::IO::Serializer
+
+=cut
+
+sub new {
+	my $class = shift;
+	my $self = {
+		'filehandle' => shift,
+	};
+	bless $self, $class;
+	if (!defined ($self->{'filehandle'})) {
+		# no file handle, let the handle point to a copy of STDOUT instead
+		open $self->{'filehandle'}, ">&STDOUT";
+		$self->{'stdout'} = 1;
+	}
+	return $self;
+}
+
+=head2 DESTROY
+
+	Destructor
+	Description: Restores default state of the STDOUT filehandle as it is a copy
+	             and may not flush correctly.
+=cut
+
+sub DESTROY {
+	my $self = shift;
+	if ($self->{'stdout'}) {
+		close $self->{'filehandle'};
+	}
+}
+
+sub print_feature {
+	throw( "print_feature method not implemented.");
+}
+
+=head2 print_feature_list
+
+	Arg [1]    : Listref of features
+	Description: Run print_feature on every feature in the list
+
+=cut
+
+sub print_feature_list {
+	my $self = shift;
+	my $feature_list = shift;
+	if (ref($feature_list) eq 'ARRAY') {
+		foreach my $feature (@{$feature_list}) {
+			$self->print_feature($feature);
+		}
+	}
+	else {
+		throw( "print_feature_list requires a listref as argument" );
+	}
+}
+
+=head2 print_feature_Iterator
+
+	Arg [1]    : Bio::EnsEMBL::Utils::Iterator
+	Description: Automatically spools through an iterator for convenience
+	Returntype : None
+=cut
+
+sub print_feature_Iterator {
+	my $self = shift;
+	my $iterator = shift;
+	if ($iterator->can('has_next')) {
+		$iterator->each(sub {$self->print_feature($_)});
+	}
+	else {
+		throw("Supplied iterator does not look like Bio::EnsEMBL::Utils::Iterator");
+	}
+}
+
+=head2 print_metadata 
+	
+	Arg [1]    : String
+	Description: Pipes a custom string into the filehandle that the serializer is using
+
+=cut
+
+sub print_metadata {
+	my $self = shift;
+	my $text = shift;
+	my $fh = $self->{'filehandle'};
+	print $fh "\n".$text."\n";
+}
+
+=head2 print_main_header
+
+	Arg [1]    : Arrayref of slices going into the file.
+	Description: Printing the header text or metadata required for this file format,
+	             Re-implement in the serializer.
+	Returntype : None
+=cut
+
+sub print_main_header {
+	my $self = shift;
+#	my $arrayref_of_slices = shift;
+#	my $fh = $self->{'filehandle'};
+	warning("No writer for headers in this format. Nothing done" );
+}
+
+=head2 print_sequence 
+	Arg [1]    : Bio::EnsEMBL::Slice
+	Description: By default, prints a block of FASTA format sequence from the given slice
+=cut
+
+sub print_sequence {
+	my $self = shift;
+	my $slice = shift;
+	print "##FASTA\n";
+	Bio::EnsEMBL::Utils::SeqDumper->dump( $slice, 'FASTA', $self->{'filehandle'});	
+}
+
+
+1;
-- 
GitLab