diff --git a/misc-scripts/db/cleanup_tmp_tables.pl b/misc-scripts/db/cleanup_tmp_tables.pl
new file mode 100755
index 0000000000000000000000000000000000000000..362e5d0576c25bdb613a130440c3e085aa129968
--- /dev/null
+++ b/misc-scripts/db/cleanup_tmp_tables.pl
@@ -0,0 +1,128 @@
+#!/usr/local/bin/perl
+
+=head1 NAME
+
+cleanup_tmp_tables.pl - delete temporary and backup tables from a database
+
+=head1 SYNOPSIS
+
+cleanup_tmp_tables.pl [arguments]
+
+Required arguments:
+
+  --dbname, db_name=NAME              database name NAME
+  --host, --dbhost, --db_host=HOST    database host HOST
+  --port, --dbport, --db_port=PORT    database port PORT
+  --user, --dbuser, --db_user=USER    database username USER
+  --pass, --dbpass, --db_pass=PASS    database passwort PASS
+
+Optional arguments:
+
+  --conffile, --conf=FILE             read parameters from FILE
+                                      (default: conf/Conversion.ini)
+
+  --logfile, --log=FILE               log to FILE (default: *STDOUT)
+  --logpath=PATH                      write logfile to PATH (default: .)
+  --logappend, --log_append           append to logfile (default: truncate)
+
+  -v, --verbose=0|1                   verbose logging (default: false)
+  -i, --interactive=0|1               run script interactively (default: true)
+  -n, --dry_run, --dry=0|1            don't write results to database
+  -h, --help, -?                      print help (this message)
+
+=head1 DESCRIPTION
+
+
+=head1 LICENCE
+
+This code is distributed under an Apache style licence:
+Please see http://www.ensembl.org/code_licence.html for details
+
+=head1 AUTHOR
+
+Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
+
+=head1 CONTACT
+
+Please post comments/questions to the Ensembl development list
+<ensembl-dev@ebi.ac.uk>
+
+=cut
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+
+use FindBin qw($Bin);
+use vars qw($SERVERROOT);
+
+BEGIN {
+    $SERVERROOT = "$Bin/../..";
+    unshift(@INC, "$SERVERROOT/ensembl/modules");
+    unshift(@INC, "$SERVERROOT/bioperl-live");
+}
+
+use Getopt::Long;
+use Pod::Usage;
+use Bio::EnsEMBL::Utils::ConversionSupport;
+
+$| = 1;
+
+my $support = new Bio::EnsEMBL::Utils::ConversionSupport($SERVERROOT);
+
+# parse options
+$support->parse_common_options(@_);
+$support->allowed_params(
+  $support->get_common_params,
+);
+
+if ($support->param('help') or $support->error) {
+    warn $support->error if $support->error;
+    pod2usage(1);
+}
+
+# ask user to confirm parameters to proceed
+$support->confirm_params;
+
+# get log filehandle and print heading and parameters to logfile
+$support->init_log;
+
+$support->check_required_params;
+
+# connect to database and get adaptors
+my $dba = $support->get_database('ensembl');
+my $dbh = $dba->dbc->db_handle;
+
+# find all temporary and backup tables
+my @tables;
+
+foreach my $pattern (qw(tmp temp bak backup)) {
+  my $sql = qq(SHOW TABLES LIKE '\%$pattern\%');
+  my $sth = $dbh->prepare($sql);
+  $sth->execute;
+  while (my ($table) = $sth->fetchrow_array) {
+    push @tables, $table;
+  }
+}
+
+if ($support->param('dry_run')) {
+  # for a dry run, only show which databases would be deleted
+  $support->log("Temporary and backup tables found:\n");
+  foreach my $table (sort @tables) {
+    $support->log("$table\n", 1);
+  }
+
+} else {
+  # delete tables
+  foreach my $table (sort @tables) {
+    if ($support->user_proceed("Drop table $table?")) {
+      $support->log("Dropping table $table...\n");
+      $dbh->do("DROP TABLE $table");
+      $support->log("Done.\n");
+    }
+  }
+}
+
+# finish logfile
+$support->finish_log;
+
diff --git a/misc-scripts/utilities/dna_compress.pl b/misc-scripts/utilities/dna_compress.pl
new file mode 100755
index 0000000000000000000000000000000000000000..3dcd5104f012e574e7944698a32b7229822c8456
--- /dev/null
+++ b/misc-scripts/utilities/dna_compress.pl
@@ -0,0 +1,294 @@
+#!/usr/local/bin/perl --    # -*-Perl-*-
+#
+# Copyright (c) 2003 Tim Hubbard (th@sanger.ac.uk)
+# Sanger Centre, Wellcome Trust Genome Campus, Cambs, UK
+# All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation
+#
+# $Header$
+
+# This is a driver script to populate and test the experimental dnac
+# (compressed dna) table of ensembl.  Use -T for pod based tutorial
+
+use strict;
+use Getopt::Std;
+use vars qw($opt_H $opt_T
+	    $opt_u $opt_d $opt_P $opt_h $opt_p $opt_C $opt_U);
+
+use Bio::EnsEMBL::DBSQL::DBAdaptor;
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+
+getopts("u:HTd:p:P:h:CU");
+
+$|=1;
+
+# specify defaults
+my $def_u='ensadmin';
+my $def_P='3306';
+my $def_h='127.0.0.1';
+
+if($opt_H){
+    &help;
+}elsif($opt_T){
+    &help2;
+}
+
+sub help {
+    print <<ENDHELP;
+
+usage:
+dna_compress.pl [options]
+
+  -H       for help
+  -T       tutorial
+  -d       database
+  -u user  ensembl db user [$def_u]
+  -d db    ensembl db
+  -P port  port of mysql   [$def_P]
+  -h host  host of mysql   [$def_h]
+  -p pass  passwd for mysqluser
+  -C       compress
+  -U       uncompress
+ENDHELP
+
+    exit 0;
+}
+
+sub help2 {
+    exec('perldoc', $0);
+}
+
+# defaults or options
+$opt_u ||= $def_u;
+$opt_h ||= $def_h;
+$opt_P ||= $def_P unless $opt_P;
+
+
+if(!$opt_d) {
+  print STDERR "ensembl db (-d) argument is required\n";
+  help();
+}
+
+if(!$opt_C && !$opt_U) {
+  print STDERR "either compress (-C) or uncompress (-U) is required\n";
+  help();
+}
+
+
+# db connection
+my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(-host   => $opt_h,
+                                            -user   => $opt_u,
+                                            -port   => $opt_P,
+                                            -dbname => $opt_d,
+                                            -pass   => $opt_p);
+
+
+
+my $meta_container = $db->get_MetaContainer();
+my $comp_seq_adaptor = $db->get_CompressedSequenceAdaptor();
+
+my ($compressed) = 
+  @{$meta_container->list_value_by_key('sequence.compression')};
+
+if($opt_C) {
+  #compress the dna in the database
+  if($compressed) {
+    throw("The database meta table indicates this database already contains"
+     ." sequence compression.\nIf this is not the case do the following:\n"
+     ."   mysql>  delete from meta where meta_key = 'sequence.compression'\n");
+  }
+
+  #one more sanity check - make sure that dna table is populated and dnac table
+  #is not
+  if(get_dna_count($db) == 0) {
+    print STDERR "There is no uncompressed dna in this database to compress.";
+    exit(0);
+  }
+  if(get_dnac_count($db) != 0) {
+    throw("Unexpected: the dnac table already contains some rows.\n");
+  }
+  
+  print STDERR "Compressing sequence and writing to dnac table.\n";
+
+  #get every slice that contains sequence
+  my $slice_adaptor = $db->get_SliceAdaptor();
+  foreach my $slice (@{$slice_adaptor->fetch_all('seqlevel')}) {
+    my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
+    $comp_seq_adaptor->store($seq_region_id, $slice->seq);
+    print STDERR ".";
+  }
+
+  print STDERR "\nSetting sequence.compression meta table entry.\n";
+  $meta_container->delete_key('sequence.compression');
+  $meta_container->store_key_value('sequence.compression', 1);
+
+  print STDERR "Deleting uncompressed sequence.\n";
+  my $sth = $db->prepare("delete from dna");
+  $sth->execute();
+
+  print STDERR "All done.\n";
+
+} else {
+  #uncompress the dna in the database
+  if(!$compressed) {
+    throw("The database meta table indicates that the sequence in this "
+     ." database is already uncompressed.\nIf this is not the case do "
+     ." the following:\n"
+     ."   mysql>  delete from meta where meta_key = 'sequence.compression';\n"
+     ."   mysql>  insert into meta (meta_key,meta_value) "
+     . "values('sequence.compression', 1);\n");
+  }
+
+  if(get_dnac_count($db) == 0) {
+    print STDERR "There is no compressed dna in this database to uncompress.";
+    exit(0);
+  }
+  if(get_dna_count($db) != 0) {
+    throw("Unexpected: the dna table already contains some rows.\n");
+  }
+
+  print STDERR "Setting sequence.compression meta table entry.\n";
+  $meta_container->delete_key('sequence.compression');
+  $meta_container->store_key_value('sequence.compression', 0);
+
+  print STDERR "Uncompressing sequence and writing to dna table.\n";
+
+  #get every slice that contains sequence
+  my $slice_adaptor = $db->get_SliceAdaptor();
+  my $seq_adaptor   = $db->get_SequenceAdaptor();
+  
+  if($seq_adaptor->isa('CompressedSequenceAdaptor')) {
+    throw("Tried to get SequenceAdaptor but got CompressedSequenceAdaptor.");
+  }
+
+  foreach my $slice (@{$slice_adaptor->fetch_all('seqlevel')}) {
+    my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
+    my $seq = $comp_seq_adaptor->fetch_by_Slice_start_end_strand($slice);
+    $seq_adaptor->store($seq_region_id, $$seq);
+    print STDERR ".";
+  }
+
+  print STDERR "\nDeleting compressed sequence.\n";
+  my $sth = $db->prepare("delete from dnac");
+  $sth->execute();
+
+  print STDERR "All done.\n";
+}
+
+
+sub get_dna_count {
+  my $db = shift;
+
+  my $sth = $db->prepare("SELECT count(*) from dna");
+  $sth->execute();
+  my ($count) = $sth->fetchrow_array();
+  $sth->finish();
+
+  return $count;
+}
+
+
+sub get_dnac_count {
+  my $db = shift;
+
+  my $sth = $db->prepare("SELECT count(*) from dnac");
+  $sth->execute();
+  my ($count) = $sth->fetchrow_array();
+  $sth->finish();
+  
+  return $count;
+}
+
+
+
+__END__
+
+=pod
+
+=head1 NAME - name
+
+dna_compress.pl
+
+=head1 DESCRIPTION
+
+Converts compressed sequence in an ensembl database to uncompressed sequence
+or converts uncompressed sequence in an ensembl database to compressed
+sequence.
+
+=head1 SYNOPSIS
+
+    
+
+=head1 EXAMPLES
+
+Compress the contents of the dna table in an ensembl database:
+
+    dna_compress.pl -C -u ensadmin -p password -h host -d homo_sapiens_core_18_34 
+
+Uncompress the contents of the dnac table in an ensembl database
+
+    dna_compress.pl -U -u ensadmin -p password -h host -d homo_sapiens_core_18_34 
+
+
+=head1 FLAGS
+
+=over 4
+
+=item -h
+
+Displays short help
+
+=item -T
+
+Displays this help message
+
+=item -C
+
+Compress the dna in the database
+
+=item -U
+
+Uncompress the dna in the database
+
+=item -h
+
+The database host
+
+=item -p
+
+The database password
+
+=item -P
+
+The database port
+
+=item -u
+
+The database user
+
+=back
+
+=head1 VERSION HISTORY
+
+=over 4
+
+=item 14-Jul-2003
+
+B<th> initial release
+
+=item 08-Oct-2003
+
+B<mcvicker> rewrote for new schema and newly implemented sequence compression
+
+=back
+
+=head1 BUGS
+
+=head1 AUTHOR
+
+B<Tim Hubbard> Email th@sanger.ac.uk
+
+=cut
diff --git a/misc-scripts/utilities/parse_embl.pl b/misc-scripts/utilities/parse_embl.pl
new file mode 100644
index 0000000000000000000000000000000000000000..e9d0f788d91c899d2a0ff3aecb1b1239296e9882
--- /dev/null
+++ b/misc-scripts/utilities/parse_embl.pl
@@ -0,0 +1,509 @@
+
+=head1 NAME
+
+parse_embl.pl
+
+=head1 SYNOPSIS
+
+ parse_embl -dbname <db name> [-host <db host>] [-port <db port>]
+  [-user <db user>] [-pass <db pass>]
+  [-genetype <gene type>]
+  -codontable <int> -emblfile <file name>
+
+=head1 DESCRIPTION
+
+This is a script originally written by Keith James to load a pathogen
+genome from an EMBL flat file into an Otter (modified ensembl) database.
+It has been has been stripped down and modified to make it more general
+purpose.
+
+This script will peform simple loading of an annotated genome in EMBL
+flat file format into an Ensembl database. The script loads a fairly
+minimal set of gene information from an EMBL file, but could be
+extended to do more.
+
+Certain assumptions are made about the format of the EMBL file such
+that it may require pre-processing before loading - these are listed
+below. A number of non-standard tags (EMBL qualifiers) are used. I
+recommend using BioPerl to write a suitable pre-processor.
+
+=over
+
+=item *
+
+There may be multiple chromosomes/plasmids per EMBL file.
+
+=item *
+
+Each chromosome/plasmid must have a source feature defining the
+chromosome/plasmid name and type. They must all have different
+names. See EMBL feature table definition for 'chromosome' and
+'plasmid' tags.
+
+=item *
+
+Protein coding genes are annotated as CDS features.
+
+=item *
+
+All gene synonyms are defined by multiple 'gene' and/or 'synonym' tags.
+
+=item *
+
+The frame of a CDS feature is defined by a 'codon_start' tag if it is
+not in frame 0.
+
+=item *
+
+If a CDS feature has a 'pseudo' tag it is taken to be a pseudogene.
+
+=item *
+
+If a CDS feature has a 'partial' tag but fuzzy ends are not defined in
+its location it is assumed the ends have not been found.
+
+=item *
+
+Non-standard translation tables are currently not handled.
+
+=item *
+
+Annotator remarks about genes are taken from 'note' tags of
+features. Remarks about transcripts are taken from 'product', 'class'
+and 'EC_number' tags of features.
+
+=back
+
+Redirect STDERR to a file to obtain a rough log of the loading
+operations.
+
+Some functions in the script are simply stubs which may be implemented
+later. Some user assembly may be required.
+
+=head1 AUTHOR - Keith James
+
+Email kdj@sanger.ac.uk
+
+=head1 CONTRIBUTORS
+
+Stephen Searle, sjms@sanger.ac.uk
+
+Graham McVicker
+
+=head1 METHODS
+
+=cut
+
+use strict;
+use Getopt::Long;
+
+use Bio::SeqIO;
+use Bio::Location::Simple;
+
+use Bio::EnsEMBL::DBSQL::DBAdaptor;
+
+use Bio::EnsEMBL::Exon;
+use Bio::EnsEMBL::Gene;
+use Bio::EnsEMBL::Slice;
+use Bio::EnsEMBL::CoordSystem;
+use Bio::EnsEMBL::Transcript;
+use Bio::EnsEMBL::Translation;
+use Bio::EnsEMBL::Analysis;
+
+use Bio::EnsEMBL::Utils::Exception qw(throw);
+
+###
+### Script globals.
+###
+
+my $MAX_SEQUENCE_LEN = 2e6;
+my $CURATED_LOGIC_NAME = 'Genome Reviews';
+
+###
+### Command line options
+###
+my $emblfile      = undef;
+my $host          = '127.0.0.1';
+my $user          = undef;
+my $pass          = undef;
+my $port          = 3306;
+my $dbname        = undef;
+my $gene_type     = 'known';
+
+GetOptions('emblfile:s'     => \$emblfile,
+           'dbname:s'       => \$dbname,
+           'user:s'         => \$user,
+           'pass:s'         => \$pass,
+           'port:n'         => \$port,
+           'host:s'         => \$host,
+           'genetype:s'     => \$gene_type);
+
+throw("-dbname argument required") if(!defined($dbname));
+throw("-emblfile argument required") if(!defined($emblfile));
+
+###
+### Get Ensembl DB adaptor
+###
+my $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(-host   => $host,
+                                             -user   => $user,
+                                             -pass   => $pass,
+                                             -port   => $port,
+                                             -dbname => $dbname);
+
+
+###
+### Create new analysis object indicating a curated origin
+###
+my $analysis = Bio::EnsEMBL::Analysis->new(-logic_name => $CURATED_LOGIC_NAME);
+
+###
+### Create sequence object and load it
+###
+my $seqi = Bio::SeqIO->new(-file   => $emblfile,
+                           -format => 'embl');
+###
+### Main loading loop
+###
+while (my $seq = $seqi->next_seq) {
+  # Split sequence and return a slice
+  my $slice = load_sequence($dba, $seq);
+
+  foreach my $f ($seq->get_SeqFeatures) {
+    if ($f->primary_tag =~ /CDS/) {
+      store_gene($dba, $f, $slice);
+    }
+
+    if ($f->primary_tag =~ /repeat/) {
+      # store_repeat($f, $slice);
+    }
+  }
+}
+
+
+=head2 store_gene
+
+ Title    : store_gene
+ Function : Stores a gene feature (both protein- and RNA-coding)
+ Returns  : void
+ Argument : Bio::SeqFeatureI, Bio::EnsEMBL::DBSQL::SliceAdaptor
+
+=cut
+
+sub store_gene {
+  my ($db, $f, $slice) = @_;
+
+  my $gene_stable_id = get_gene_stable_id($f);
+
+  my $gene = Bio::EnsEMBL::Gene->new();
+
+  $gene->analysis($analysis);
+  $gene->type($gene_type);
+  $gene->stable_id($gene_stable_id);
+  $gene->version(1);
+  $gene->slice($slice);
+
+  print STDERR sprintf("Found CDS with ID %s\n", $gene_stable_id);
+
+  my $tcount = 0;
+  my $transcript_id = sprintf("%s.%d", $gene_stable_id, $tcount++);
+
+  my $transcript = Bio::EnsEMBL::Transcript->new;
+  $transcript->stable_id($transcript_id);
+  $transcript->version(1);
+  $transcript->slice($slice);
+
+  $gene->add_Transcript($transcript);
+
+  # Add the exons
+  my @exons = create_exons($f);
+  my $ecount = 0;
+  foreach my $exon (@exons) {
+    $exon->stable_id(sprintf("%s.%d", $gene_stable_id, $ecount++));
+    $exon->version(1);
+    $exon->slice($slice);
+    $transcript->add_Exon($exon);
+  }
+
+  if ($f->primary_tag =~ /RNA/) {
+    foreach my $exon (@exons) {
+      $exon->phase(-1);
+      $exon->end_phase(-1);
+    }
+  }
+
+  # Handle protein CDS features
+  if ($f->primary_tag eq 'CDS') {
+    # Based on /codon_start EMBL qualifier
+    my $frame = get_initial_frame($f);
+
+    @exons = @{ $transcript->get_all_Exons };
+
+    # This code assumes no UTRs
+    my $translation = Bio::EnsEMBL::Translation->new;
+    my $rcount = 0;
+    $translation->stable_id(sprintf("%s.%d", $gene_stable_id, $rcount++));
+    $translation->version(1);
+    $translation->start_Exon($exons[0]);
+    $translation->start(1 + $frame);
+    $translation->end_Exon($exons[$#exons]);
+    $translation->end($translation->end_Exon->length);
+
+    set_exon_phases($translation, @exons);
+
+    foreach my $exon (@exons) {
+      print STDERR sprintf("Added exon start: %d end: %d strand: %d phase: %d\n",
+                           $exon->start, $exon->end, $exon->strand, $exon->phase);
+    }
+
+    $transcript->translation($translation);
+
+    my $mrna_seq =
+      Bio::Seq->new(-seq      => $transcript->translateable_seq,
+                    -moltype  => "dna",
+                    -alphabet => 'dna',
+                    -id       => $translation->stable_id);
+
+    # Translate args: stop char, unknown aa char, frame,
+    # table, full CDS, throw
+    my $aa_seq = $transcript->translate()->seq();
+
+    print STDERR sprintf("Translation is: %s\n", $aa_seq);
+
+    if ($aa_seq =~ /\*/) {
+      print STDERR sprintf("Failed translating %s after phase setting\n",
+                           $translation->stable_id);
+    }
+  }
+
+  eval {
+    $db->get_GeneAdaptor->store($gene);
+  };
+  throw(sprintf("Failed loading %s\n%s\n", $gene_stable_id, $@)) if($@);
+}
+
+
+
+=head2 get_gene_stable_id
+
+  Arg [1]    : Bio::SeqFeatureI $f
+  Example    : my $stable_id = get_gene_stable_id($f);
+  Description: Tries to get a sensible stable identifier from a bioperl feature
+               created from an embl flat file.
+  Returntype : string
+  Exceptions : throw if cannot determine stable identifier
+  Caller     : 
+
+=cut
+
+sub get_gene_stable_id {
+  my $f = shift;
+
+  my $stable_id;
+
+  if($f->has_tag('locus_tag')) {
+    my @vals = $f->get_tag_values('locus_tag');
+    ($stable_id) = split(/\s+/,$vals[0]);
+    return $stable_id if($stable_id);
+  }
+
+  if($f->has_tag('gene')) {
+    my @vals = $f->get_tag_values('gene');
+    ($stable_id) = split(/\s+/,$vals[0]);
+    return $stable_id if($stable_id);
+  }
+
+  throw("Could not determine gene identifier\n");
+}
+
+
+=head2 create_exons
+
+ Title    : create_exons
+ Function : Returns a list of exons created from the location of
+            feature.
+ Returns  : List of Bio::EnsEMBL::Exons
+ Argument : Bio::SeqFeatureI
+
+=cut
+
+sub create_exons {
+  my $f = shift;
+
+  my @exons;
+
+  foreach my $loc ($f->location->each_Location) {
+    push(@exons, Bio::EnsEMBL::Exon->new(-start  => $loc->start,
+                                         -end    => $loc->end,
+                                         -strand => $loc->strand));
+
+    print STDERR sprintf("Creating exon at %d..%d on strand %d\n",
+                         $loc->start, $loc->end, $loc->strand);
+  }
+
+  if ($f->has_tag('pseudo')) {
+    foreach my $exon (@exons) {
+      $exon->phase(-1);
+      $exon->end_phase(-1);
+    }
+  }
+  else {
+    foreach my $exon (@exons) {
+      $exon->end_phase(0);
+    }
+  }
+
+  return @exons;
+}
+
+=head2 get_initial_frame
+
+ Title    : get_initial_frame
+ Function : Returns the frame specified by the codon_start tag of
+            feature.
+ Returns  : int
+ Argument : Bio::SeqFeatureI
+
+=cut
+
+sub get_initial_frame {
+  my $f = shift;
+
+  my $frame = 0;
+
+  if ($f->has_tag('codon_start')) {
+    my @vals = $f->get_tag_values('codon_start');
+    $frame = $vals[0] - 1;
+  }
+
+  return $frame;
+}
+
+=head2 set_exon_phases
+
+ Title    : set_exon_phases
+ Function : Sets the start and end phases of exons.
+ Returns  : void
+ Argument : Bio::Otter::AnnotatedTranscript, Bio::Ensembl::Exons
+
+=cut
+
+sub set_exon_phases {
+  my $translation = shift;
+  my @exons = @_;
+
+  my $found_start = 0;
+  my $found_end   = 0;
+  my $phase       = 0;
+
+  foreach my $exon (@exons) {
+    # Internal and end exons
+    if ($found_start && ! $found_end) {
+      $exon->phase($phase);
+      $exon->end_phase(($exon->length + $exon->phase) % 3);
+      $phase = $exon->end_phase;
+    }
+
+    if ($translation->start_Exon == $exon) {
+      $exon->phase($phase);
+      $exon->end_phase((($exon->length - $translation->start + 1)
+                        + $exon->phase) % 3);
+      $phase = $exon->end_phase;
+      $found_start = 1;
+    }
+
+    if ($translation->end_Exon == $exon) {
+      $found_end = 1;
+    }
+  }
+}
+
+
+
+=head2 get_seq_region_data
+
+  Arg [1]    : Bio::SeqI
+  Example    : my ($csname, $seq_region_name) = get_seq_region_data($seq);
+  Description: Gets the coordinate system name (e.g. 'chromosome', 'plasmid')
+               and the name of the seq_region
+  Returntype : pair of strings
+  Exceptions : none
+  Caller     : load_sequence
+
+=cut
+
+sub get_seq_region_data {
+  my $seq = shift;
+  my $type;
+  my $name;
+
+  foreach my $f ($seq->get_SeqFeatures) {
+    if ($f->primary_tag eq 'source') {
+      if ($f->start == 1 and $f->end == $seq->length) {
+        if ($f->has_tag('chromosome')){
+          $type = 'chromosome';
+          my @vals = $f->get_tag_values('chromosome');
+          $name = $vals[0];
+          $name =~ s/chr(omosome)\s+//i; #strip off chromosome prefix if present
+          last;
+        }
+
+        if ($f->has_tag('plasmid')) {
+          $type = 'plasmid';
+          my @vals = $f->get_tag_values('plasmid');
+          $name = $vals[0];
+          $name =~ s/plasmid\s+//i; #strip off plasmid prefix if present
+          last;
+        }
+      }
+    }
+  }
+
+  return ($type, $name);
+}
+
+
+=head2 load_sequence
+
+  Arg [1]    : Bio::EnsEMBL::DBAdaptor $db
+  Arg [2]    : Bio::SeqI $seq
+  Example    : load_sequence
+  Description: Loads the sequence for this organism into the database, creating
+               Coordinate system and seq_region as necessary
+  Returntype : none
+  Exceptions : none
+  Caller     : general
+
+=cut
+
+sub load_sequence {
+  my $db = shift;
+  my $seq = shift;
+
+  my ($csname, $seq_region) = get_seq_region_data($seq);
+
+  my $csa = $db->get_CoordSystemAdaptor();
+  my $cs = $csa->fetch_by_name($csname);
+
+  if(!$cs) {
+    $cs = Bio::EnsEMBL::CoordSystem->new(-NAME           => $csname,
+                                          -SEQUENCE_LEVEL => 1,
+                                          -RANK           => 1,
+                                          -DEFAULT        => 1);
+    $csa->store($cs);
+  }
+
+  my $slice = Bio::EnsEMBL::Slice->new(-SEQ_REGION_NAME => $seq_region,
+                                       -COORD_SYSTEM    => $cs,
+                                       -START           => 1,
+                                       -END             => $seq->length(),
+                                       -SEQ_REGION_LENGTH => $seq->length());
+
+
+  my $slice_adaptor = $db->get_SliceAdaptor();
+
+  my $sequence = $seq->seq();
+  $slice_adaptor->store($slice, \$sequence);
+
+  return $slice;
+}
+