decided to put this stuff in pipeline

3841d1bd · Laura Clarke · 69ee1ecc · 69ee1ecc · 69ee1ecc · 69ee1ecc
Commit 3841d1bd authored 22 years ago by Laura Clarke
--- a/scripts/DataConversion/wormbase/elegans_agp_parse.pl
+++ b/scripts/DataConversion/wormbase/elegans_agp_parse.pl
-#!/usr/local/ensembl/bin/perl -w
-
-# mouse agp -> ensembl sgp
-
-$| = 1;
-
-#print STDERR "have entered script\n";
-my $agp = shift or die;
-my $raw = shift or die;
-my $chromosome = shift or die;
-
-my @sgp;
-my %chr_id;
-my %ctg;
-
-open RAW, "< $raw" or die "couldn't open file ".$raw." @!";
-open AGP, "< $agp" or die;
-open CHR, "< $chromosome" or die;
-
-while (<RAW>) { 
-  chomp;
-  #print;
-  #print "\n";
-  my ($contig_id, $iid, $length) = split;
-  #print "have ".$contig_id." ".$iid." ".$length."\n";
-  #AL031185.2.1.4059
-  #my ($id) = $contig_id =~ /(\w+\.\d+)\.\d+\.\d+/;   # strip off leading 'sc<date>_'
-  $ctg{$contig_id} = [ $iid, $length ];
-}
-
-while(<CHR>){
-  chomp;
-  my ($id, $name) = split;
-  #print "have ".$id." name ".$name."\n";
-  $chr_id{$name} = $id; 
-}
-
-while (<AGP>) {
-  
-  chomp;
-  #I	47490	107680	3	F	AC024796.1	1	60191	+
-  #print;
-  #print "\n";
-  my ($chr, $chr_start, $chr_end, $gap,  $contig, $raw_start, $raw_end, $raw_ori) =
-    (split)[0, 1, 2, 4, 5, 6, 7, 8];
-  #next if $raw_start eq 'fragment';
-  #next if $raw_start eq 'clone';
-  #next if $raw_start eq 'contig';
-  if($gap eq 'N'){
-    next;
-  }
-  if($contig eq '.'){
-    next;
-  }
-  #$chr =~ s/chr//;
-  
-  #next if $chr eq 'NA_contam';
-  #print $chr." start ".$chr_start." end ".$chr_end." contig ".$contig." start ".$raw_start." end ".$raw_end." ori ".$raw_ori."\n";
-  if ($raw_ori eq '+') {
-    $raw_ori = 1;
-  }
-  elsif ($raw_ori eq '-') {
-    $raw_ori = -1;
-  }
-  else {
-    $raw_ori = 1;
-    #print "$chr Contig  $contig  $chr_start \n";
-    #print "Warning assumed default orientation for $contig\n";
-  }
-  my $chromosome_id = $chr_id{$chr};
-  my $ctg_id    = $ctg{$contig}->[0];
-  die "Don't have raw contig for $contig\n" unless $ctg_id;
-  #print $contig."\n" if(!$ctg_id);
-  #my $ctg_start = $ctg{$contig}->[1];
-  my $ctg_end   = $ctg{$contig}->[1];
-  if($raw_end > $ctg_end){
-    next;
-    #warn "contig ".$contig." length ".$ctg_end." is less than agp length ".$raw_end." won't work\n";
-  }
-  #if ($raw_start != $ctg_start || $raw_start != $ctg_start) {
-  # raw coords from AGP assumed to be from 1 to length of contig
-  #	print "Warning: my assumption about contig coords for $contig is wrong\n";
-  #    }
-  
-  # $sgp[0] = qq{};
-  $sgp[0] = $chromosome_id;   # fpc = contig id
-  $sgp[1] = $chr_start;
-  $sgp[2] = $chr_end;
-  $sgp[3] = $chr;
-  $sgp[4] = $chr_start;
-  $sgp[5] = $chr_end;   # fpc start
-  $sgp[6] = 1;   # fpc end
-  $sgp[7] = $ctg_id;          # fpc ori
-  $sgp[8] = $raw_start;   # raw start
-  $sgp[9] = $raw_end;   # raw end
-  $sgp[10] = $raw_ori;
-  $sgp[11] = qq{elegans_90};
-  
-  print join("\t", @sgp), "\n";
-}
-
-close RAW;
-close AGP;
-close CHR;
--- a/scripts/DataConversion/wormbase/loadscaffold_elegans.pl
+++ b/scripts/DataConversion/wormbase/loadscaffold_elegans.pl
-#!/usr/local/bin/perl -w
-
-use strict;
-use vars qw($USER $PASS $DB $HOST $DSN);
-use Bio::EnsEMBL::DBLoader;
-use Bio::SeqIO;
-use Bio::EnsEMBL::RawContig;
-use Bio::EnsEMBL::Clone;
-use Getopt::Long;
-
-my $dbtype = 'rdb';
-my $host   = 'ecs1d';
-my $port   = '';
-my $dbname = 'elegans_newschema';
-my $dbuser = 'ecs1dadmin';
-my $dbpass = 'TyhRv';
-my $module = 'Bio::EnsEMBL::DBSQL::DBAdaptor';
-$| = 1;
-my $help = 0;
-my $pipe = 1;
-my $verbose = 0;
-
-&GetOptions( 
-	     'dbtype:s'   => \$dbtype,
-	     'host:s'     => \$host,
-	     'port:n'     => \$port,
-	     'dbname:s'   => \$dbname,
-	     'dbuser:s'   => \$dbuser,
-	     'dbpass:s'   => \$dbpass,
-	     'module:s'   => \$module,
-             'pipe'       => \$pipe,
-	     'verbose'    => \$verbose,
-	     'h|help'     => \$help
-	     );
-if ($help) {
-    exec('perldoc', $0);
-}
-
-
-$SIG{INT} = sub {my $sig=shift;die "exited after SIG$sig";};
-my $locator;
-if ($dbpass) {
-   $locator = "$module/host=$host;port=;dbname=$dbname;user=$dbuser;pass=$dbpass";
-} else {
-   $locator = "$module/host=$host;port=;dbname=$dbname;user=$dbuser;pass=";
-}
-my $db =  Bio::EnsEMBL::DBLoader->new($locator);
-
-my ($seqfile) = shift;
-my ($contig_id_file) = shift;
-
-open(FH, $contig_id_file);
-
-my %seq_got;
-
-if( !defined $seqfile ) { die 'cannot load because sequence file to load sequences from was not passed in as argument';}
-
-my $seqio = new Bio::SeqIO(-format => 'Fasta', 
-			   -file => $seqfile);
-my $count = 0;
-
-while( my $seq = $seqio->next_seq ) {
-  my @clone_ids = split / /, $seq->desc;
-  #print $seq->id."\n";
-  #print $seq->desc."\n";
-  if($seq->seq =~ /match/i){
-    die("seq = ".$seq->seq."\n");
-  }
-  if($seq->seq =~ /abort/i){
-    die("seq = ".$seq->seq."\n");
-  }  
-  my $clone     = new Bio::EnsEMBL::Clone;
-  my $contig    = new Bio::EnsEMBL::RawContig;
-  my ($version) = $clone_ids[0] =~ /\S+\.(\d+)/;
-  $seq_got{$clone_ids[0]} = 1;
-  $clone->htg_phase(4);
-  $clone->id($clone_ids[0]); 
-  $clone->embl_id($clone_ids[0]);
-  $clone->version(1);
-  $clone->embl_version($version);
-  $clone->adaptor($db->get_CloneAdaptor);
-  my $contig_id = $clone_ids[0].".1.".$seq->length;
-  $contig->name($contig_id);
-  $contig->dbID($count++);
-  $contig->seq($seq->seq);
-  $contig->length($seq->length);
-  $contig->adaptor($db->get_RawContigAdaptor);
-  $clone->add_Contig($contig);
-  my $time = time;
-  $clone->created($time);
-  $clone->modified($time);
-  #print "have clone with ".$clone->id." contig ".$contig->id."\n";
-  eval { 
-    $db->get_CloneAdaptor->store($clone);
-    $verbose && print STDERR "Written ".$clone->id." scaffold into db\n";
-  };
-  if( $@ ) {
-    print STDERR "Could not write clone into database, error was $@\n";
-  }
-}
-
-
-while(<FH>){
-  chomp;
-  my $name = $_;
-  if(!$seq_got{$name}){
-    print "don't seem to have sequence for ".$name."\n";
-  }
-}
-
-close(FH);
--- a/scripts/DataConversion/wormbase/translation_check.pl
+++ b/scripts/DataConversion/wormbase/translation_check.pl
-#!/usr/local/ensembl/bin/perl -w
-
-use strict;
-use Bio::EnsEMBL::DBSQL::DBAdaptor;
-
-
-$| = 1;
-
-my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(-host => "ecs1d",
-					    -user => "ecs1dadmin",
-					    -dbname => "elegans_newschema",
-					    -pass  => "TyhRv",
-					   );
-
-my %chromosome;
-my $sql = "select name, length from chromosome";
-my $sth = $db->prepare($sql);
-$sth->execute;
-
-my $slice_adaptor = $db->get_SliceAdaptor();
-
-while(my ($name, $length) = $sth->fetchrow){
-  my $slice = $slice_adaptor->fetch_by_chr_start_end($name, 1, $length);
-  $chromosome{$name} = $slice;
-} 
-
-
-foreach my $name(keys(%chromosome)){
-
-my @genes;
-
-my $slice = $chromosome{$name};
-#$sql = 'select gene_id from gene';
-
-#$sth = $db->prepare($sql);
-
-#$sth->execute;
-
-#while(my ($gene_id) = $sth->fetchrow){
-#  my $gene = $db->get_GeneAdaptor->fetch_by_dbID($gene_id);
-#  push(@genes, $gene);
-  
-#}
-
-@genes = @{$slice->get_all_Genes};
-#print STDERR "there are ".@genes." genes\n";
-
-
-
-my $sticky_count = 0;
-my $non_translate = 0;
-foreach my $gene(@genes){
-
-  #$sticky_count += &sticky_check($gene);
-  $non_translate += &translation_check($gene);
-  #my ($transcript) = @{$gene->get_all_Transcripts};
-  #if($translate){
-  #  my @exons = @{$transcript->get_all_Exons};
-    #&display_exons(@exons);
-    #&non_translate($transcript);
-  #}
-}
-
-#print "there are ".$sticky_count." transcripts containing sticky_exons\n";
-print "there are ".$non_translate." non translating transcripts in ".$name."\n";
-
-}
-
-
-
-sub sticky_check{
-  my ($gene) = @_;
-  
-  my $count = 0;
-  my $sticky = 0;
-  my @transcripts = @{$gene->get_all_Transcripts};
-  foreach my $t(@transcripts){
-    foreach my $e(@{$t->get_all_Exons}){
-      if($e->isa("Bio::EnsEMBL::StickyExon")){
-	$sticky = 1;
-      }
-    }
-    if($sticky){
-      $count++;
-    }
-    $sticky = 0;
-  }
-  return $count;
-}
-sub translation_check{
-  my ($gene) = @_;
-
-  my $count = 0;
-  my @transcripts = @{$gene->get_all_Transcripts};
-  foreach my $t(@transcripts){
-    my $pep = $t->translate->seq;
-    if($pep =~ /\*/){
-      print STDERR "\ntranscript ".$t->stable_id." doesn't translate: \n\n";
-      $count++;
-    }
-  }
-  return $count;
-}
-
-sub display_exons{
-  my (@exons) = @_;
-
-  @exons = sort{$a->start <=> $b->start || $a->end <=> $b->end} @exons;
-
-  if($exons[0]->can ('hseqname')){
-    foreach my $e(@exons){
-      print $e->hseqname."\t ".$e->start."\t ".$e->end."\t ".$e->strand."\t ".$e->phase."\t ".$e->end_phase."\n";
-    }
-  }else{
-     foreach my $e(@exons){
-      print $e->seqname."\t ".$e->start."\t ".$e->end."\t ".$e->strand."\t ".$e->phase."\t ".$e->end_phase."\n";
-    }
-  }
-}
-
-sub non_translate{
-  my (@transcripts) = @_;
-  
-  foreach my $t(@transcripts){
-    
-    my @exons = @{$t->get_all_Exons};
-#    print "transcript sequence :\n".$t->seq."\n";
-    foreach my $e(@exons){
-      my $seq = $e->seq;
-      my $pep0 = $seq->translate('*', 'X', 0);
-      my $pep1 = $seq->translate('*', 'X', 1);
-      my $pep2 = $seq->translate('*', 'X', 2);
-      print "exon sequence :\n".$e->seq->seq."\n\n";
-      print $e->seqname." ".$e->start." : ".$e->end." translation in 0 frame\n ".$pep0->seq."\n\n";
-      print $e->seqname." ".$e->start." : ".$e->end." translation in 1 phase\n ".$pep2->seq."\n\n";
-      print $e->seqname." ".$e->start." : ".$e->end." translation in 2 phase\n ".$pep1->seq."\n\n";
-      print "\n\n";
-      
-    }
-    
-  }
-}
--- a/scripts/DataConversion/wormbase/wormbase_gff_to_ensembl.pl
+++ b/scripts/DataConversion/wormbase/wormbase_gff_to_ensembl.pl
-#!/usr/local/ensembl/bin/perl -w
-
-use strict;
-use Bio::EnsEMBL::DBSQL::DBAdaptor;
-use Bio::EnsEMBL::Transcript;
-use Bio::EnsEMBL::Gene;
-use Bio::EnsEMBL::Translation;
-use Bio::EnsEMBL::Exon;
-
-$| = 1;
-
-my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(-host => "ecs1d",
-					    -user => "ecs1dadmin",
-					    -dbname => "elegans_newschema",
-					    -pass  => "TyhRv",
-					   );
-#CHROMOSOME_I	curated CDS	11641	11689	.	+	0	Sequence "Y74C9A.2"
-#CHROMOSOME_I	curated CDS	14951	15160	.	+	2	Sequence "Y74C9A.2"
-#CHROMOSOME_I	curated CDS	16473	16585	.	+	2	Sequence "Y74C9A.2"
-#CHROMOSOME_I	curated CDS	43733	43961	.	+	0	Sequence "Y74C9A.1"
-#CHROMOSOME_I	curated CDS	44030	44234	.	+	2	Sequence "Y74C9A.1"
-#CHROMOSOME_I	curated CDS	44281	44328	.	+	1	Sequence "Y74C9A.1"
-#CHROMOSOME_I	curated CDS	44521	44677	.	+	1	Sequence "Y74C9A.1"
-#CHROMOSOME_I	curated CDS	49921	50016	.	+	0	Sequence "Y48G1C.4"
-#CHROMOSOME_I	curated CDS	50815	51030	.	+	0	Sequence "Y48G1C.4"
-#CHROMOSOME_I	curated CDS	52283	52410	.	+	0	Sequence "Y48G1C.4"
-#CHROMOSOME_I	curated CDS	52466	52572	.	+	1	Sequence "Y48G1C.4"
-
-
-my %chromosome;
-my $sql = "select name, length from chromosome";
-my $sth = $db->prepare($sql);
-$sth->execute;
-
-my $slice_adaptor = $db->get_SliceAdaptor();
-
-while(my ($name, $length) = $sth->fetchrow){
-  my $slice = $slice_adaptor->fetch_by_chr_start_end($name, 1, $length);
-  $chromosome{$name} = $slice;
-} 
-
-
-
-my $analysis_adaptor = $db->get_AnalysisAdaptor();
-my $analysis = $analysis_adaptor->fetch_by_logic_name('wormbase');
-my $forward_count = 0;
-my $reverse_count = 0;
-my $count = 0;
-
-
-my %genes;
-my @stored;
-
-my $file = shift;
-open(FH, $file) or die "couldn't open $file $!";
-
-my $transcripts = &process_file(\*FH);
-my $processed_transcripts = &process_transcripts($transcripts, $analysis);
-
-#print "there are ".keys(%$processed_transcripts)." transcript\n";
-
-my $genes = &create_transcripts($processed_transcripts);
-
-#print "there are ".keys(%$genes)." genes\n";
-
-
-my $gene_adaptor = $db->get_GeneAdaptor;
-my @genes;
-foreach my $gene_id(keys(%$genes)){
-  my $db_id;
-  my $transcripts = $genes->{$gene_id};
-  my $unpruned = &create_gene($transcripts, $gene_id);
-##  print STDERR "gene ".$unpruned."\n";
-  my $gene = &prune_Exons($unpruned);
-  push(@genes, $gene);
-  #eval{
-  #  $gene->transform;
-  #};
-  foreach my $transcript (@{$gene->get_all_Transcripts}){
-    print "transcript ".$transcript->stable_id."\n";
-    my @exons = @{$transcript->get_all_Exons};
-    if($exons[0]->strand == -1){
-      @exons = sort{$b->start <=> $a->start} @exons;
-    }else{
-      @exons = sort{$a->start <=> $b->start} @exons;
-    }
-    foreach my $exon(@exons){
-      print "exon ".$exon->stable_id."\t".$exon->seqname."\t ".$exon->start."\t ".$exon->end."\t ".$exon->strand."\t ".$exon->phase."\t ".$exon->end_phase."\n";
-    }
-    
-  }
-
-#  if($@){
-#    warn "couldn't transform ".$gene." $@\n";
-#    next;
-#  }
-#  eval{
-#    #$gene_adaptor->store($gene);
-#  };
-#  if($@){
-#    warn "couldn't store ".$gene."\n";
-#    next;
-#  }
-#  $db_id = $gene->dbID;
-#  print STDERR "have stored gene ".$gene->dbID."\n";
-#  #my $stored_gene = $gene_adaptor->fetch_by_dbID($db_id);
-#  #&translation_check($stored_gene);
-}
- 
-print "there are ".@genes." genes in wormbase 90\n";
-
-sub process_file{
-  my ($fh) = @_;
-  
-  my %transcripts;
- LOOP: while(<$fh>){
-#    print;
-    chomp;
-    my($chr, $status, $type, $start, $end, $score, $strand, $frame, $sequence, $gene) = split;
-    my $element = $_;
-    if(!$status && !$type){
-      #print "status and type no defined skipping\n";
-      next LOOP;
-    }
-    my $line = $status." ".$type;
-#    print "line ".$line."\n";
-    if($line ne 'curated CDS'){
-      next LOOP;
-    }
-    $gene =~ s/\"//g;
-    if(!$transcripts{$gene}){
-      $transcripts{$gene} = [];
-      push(@{$transcripts{$gene}}, $element);
-    }else{
-      push(@{$transcripts{$gene}}, $element);
-    }
-    
-  }
-  return \%transcripts;
-}
-
-
-sub process_transcripts{
-  my ($transcripts, $analysis) = @_;
-  
-  my %genes;
-  my %transcripts = %$transcripts;
-  my @names = keys(%transcripts);
-  #print STDERR "PROCESSING TRANSCRIPTS \n";
-  foreach my $name(@names){
-    my @lines = @{$transcripts{$name}};
-    $transcripts{$name} = [];
-    my @exons;
-    foreach my $line(@lines){
-      my($chr, $status, $type, $start, $end, $score, $strand, $frame, $sequence, $gene) = split /\s+/, $line;
-      $chr =~ s/CHROMOSOME_//;
-      if($start == $end){
-	next;
-      }
-      my $slice = $chromosome{$chr};
-      my $exon = new Bio::EnsEMBL::Exon;
-      my $phase = (3 - $frame)%3;
-      #print "have ".$phase." phase\n";
-      $exon->start($start);
-      $exon->end($end);
-      $exon->analysis($analysis);
-      $exon->contig($slice);
-      $exon->phase($phase);
-      my $end_phase = ($phase + ($exon->end-$exon->start) + 1)%3;
-      $exon->end_phase($end_phase);
-      if($strand eq '+'){
-	$exon->strand(1);
-      }else{
-	$exon->strand(-1);
-      }
-      $exon->score(100);
-      push(@exons, $exon);
-    }
-    if($exons[0]->strand == -1){
-      @exons = sort{$b->start <=> $a->start} @exons;
-    }else{
-      @exons = sort{$a->start <=> $b->start} @exons;
-    }
-    #print STDERR $name." transcript\n";
-    my $phase = 0;
-    foreach my $e(@exons){
-      #print "phase ".$phase."\n";
-      #$e->phase($phase);
-      #my $end_phase = ($phase + ($e->end-$e->start) + 1)%3;
-      #$e->end_phase($end_phase);
-      #print "end phase ".$end_phase."\n";
-      #$phase = $end_phase;
-      push(@{$transcripts{$name}}, $e);
-    }
-  }
-  #print STDERR "FINISHED PROCESSING TRANSCRIPTS\n";
-  return \%transcripts;
-
-}
-
-
-
-
-
-sub create_transcripts{
-  my ($transcripts) = @_;
-  #print STDERR "CREATING TRANSCRIPTS\n";
-  my %transcripts = %$transcripts;
-  my @non_translate;
-  my %genes;
-  my $gene_name;
-  my $transcript_id;
-  foreach my $transcript(keys(%transcripts)){
-    my $time = time;
-    #print STDERR "transcript ".$transcript." \n";
-    my @exons = @{$transcripts{$transcript}};
-    if($transcript =~ /\w+\.\d+\w+/){
-     ($gene_name) = $transcript =~ /(\w+\.\d+)\w+/;
-     $transcript_id = $transcript;
-    }else{
-      $gene_name = $transcript;
-      $transcript_id = $transcript;
-    }
-  
-    my $transcript = new Bio::EnsEMBL::Transcript;
-    my $translation = new Bio::EnsEMBL::Translation;
-    my @sorted_exons;
-    if($exons[0]->strand == 1){
-      @sorted_exons = sort{$a->start <=> $b->start} @exons
-    }else{
-      @sorted_exons = sort{$b->start <=> $a->start} @exons  
-    }
-    my $exon_count = 1;
-    my $phase = 0;
-    foreach my $exon(@sorted_exons){
-      $exon->created($time);
-      $exon->modified($time);
-      $exon->version(1);
-      $exon->stable_id($transcript_id.".".$exon_count);
-      #$exon->phase($phase);
-      #my $end_phase = ($phase + ($end-$start) + 1)%3; 
-      #$exon->end_phase($end_phase);
-      #$phase = $end_phase;
-      $exon_count++;
-      $transcript->add_Exon($exon);
-    }
-    $translation->start_Exon($sorted_exons[0]);
-    $translation->end_Exon  ($sorted_exons[$#sorted_exons]);
- 
-    if ($sorted_exons[0]->phase == 0) {
-      $translation->start(1);
-    } elsif ($sorted_exons[0]->phase == 1) {
-      $translation->start(3);
-    } elsif ($sorted_exons[0]->phase == 2) {
-      $translation->start(2);
-    }
-    $translation->end  ($sorted_exons[$#sorted_exons]->end - $sorted_exons[$#sorted_exons]->start + 1);
-#    $translation->version(1);
-    $translation->stable_id($transcript_id);
-    $transcript->translation($translation);
-    $transcript->version(1);
-    $transcript->stable_id($transcript_id);
-   # my $mrna = $transcript->translateable_seq;
-#    my $last_codon = substr($mrna, -3);
-#    if($last_codon eq 'TAG' || $last_codon eq 'TGA' || $last_codon eq 'TAA'){
-#      my $translation_end =   ($sorted_exons[$#sorted_exons]->end - $sorted_exons[$#sorted_exons]->start + 1)-3;
-#      if($translation_end <= 0){
-#	my $exon_number = $#sorted_exons
-#      }
-#      $translation->end  (($sorted_exons[$#sorted_exons]->end - $sorted_exons[$#sorted_exons]->start + 1)-3); 
-#    }
-    my $peptide = $transcript->translate->seq;
-    if($peptide =~ /\*./){
-     # warn "transcript ".$transcript->stable_id." doesn't translate and won't be used\n";
-     # push(@non_translate, $transcript);
-     # next;
-    }
-    #print STDERR "transcript ".$transcript->stable_id." has ".@{$transcript->get_all_Exons}." exons and produces and translation: ".$peptide."\n";
-    my @stored_exons = @{$transcript->get_all_Exons};
-    #&display_exons(@stored_exons);
-    if(!$genes{$gene_name}){
-      $genes{$gene_name} = [];
-      push(@{$genes{$gene_name}}, $transcript);
-    }else{
-      push(@{$genes{$gene_name}}, $transcript);
-    }
-  }
-
-  foreach my $t(@non_translate){
-    print STDERR "transcript ".$t->stable_id." has ".@{$t->get_all_Exons}." exons\n";
-    my @exons = @{$t->get_all_Exons};
-    &display_exons(@exons);
-    &non_translate($t);
-  }
-  #print STDERR "FINISHED CREATEING TRANSCRIPTS\n";
-  return \%genes;
-
-}
-
-
-sub create_gene{
-  my ($transcripts, $name) = @_;
-  #print STDERR "CREATING GENE\n";
-  my $time = time;
-  my $gene = new Bio::EnsEMBL::Gene; 
-  my $exons = $transcripts->[0]->get_all_Exons;
-  my $analysis = $exons->[0]->analysis;
-  $gene->analysis($analysis);
-  $gene->type($analysis->logic_name);
-  $gene->created($time);
-  $gene->modified($time);
-  $gene->version(1);
-  $gene->stable_id($name);
-  foreach my $transcript(@$transcripts){
-    $gene->add_Transcript($transcript);
-  }
-  
-  return $gene;
-}
-
-
-sub prune_Exons {
-  my ($gene) = @_;
-  
-  my @unique_Exons; 
-  
-  # keep track of all unique exons found so far to avoid making duplicates
-  # need to be very careful about translation->start_Exon and translation->end_Exon
-  
-  foreach my $tran (@{$gene->get_all_Transcripts}) {
-    my @newexons;
-    foreach my $exon (@{$tran->get_all_Exons}) {
-      my $found;
-      #always empty
-    UNI:foreach my $uni (@unique_Exons) {
-	if ($uni->start  == $exon->start  &&
-	    $uni->end    == $exon->end    &&
-	    $uni->strand == $exon->strand &&
-	    $uni->phase  == $exon->phase  &&
-	    $uni->end_phase == $exon->end_phase
-	   ) {
-	  $found = $uni;
-	  last UNI;
-	}
-      }
-      if (defined($found)) {
-	push(@newexons,$found);
-	if ($exon == $tran->translation->start_Exon){
-	  $tran->translation->start_Exon($found);
-	}
-	if ($exon == $tran->translation->end_Exon){
-	  $tran->translation->end_Exon($found);
-	}
-      } else {
-	push(@newexons,$exon);
-	push(@unique_Exons, $exon);
-      }
-    }          
-    $tran->flush_Exon;
-    foreach my $exon (@newexons) {
-      $tran->add_Exon($exon);
-    }
-  }
-  return $gene;
-}
-
-sub translation_check{
-  my ($gene) = @_;
-
-  my @transcripts = @{$gene->get_all_Transcripts};
-  foreach my $t(@transcripts){
-    my $pep = $t->translate->seq;
-    if($pep =~ /\*/){
-      print STDERR "transcript ".$t->stable_id." doesn't translate\n";
-    }
-  }
-  
-}
-
-
-sub display_exons{
-  my (@exons) = @_;
-
-  @exons = sort{$a->start <=> $b->start || $a->end <=> $b->end} @exons;
-
-  if($exons[0]->can ('hseqname')){
-    foreach my $e(@exons){
-      print $e->hseqname."\t ".$e->start."\t ".$e->end."\t ".$e->strand."\t ".$e->phase."\t ".$e->end_phase."\n";
-    }
-  }else{
-     foreach my $e(@exons){
-      print $e->seqname."\t ".$e->start."\t ".$e->end."\t ".$e->strand."\t ".$e->phase."\t ".$e->end_phase."\n";
-    }
-  }
-}
-
-sub non_translate{
-  my (@transcripts) = @_;
-  
-  foreach my $t(@transcripts){
-    
-    my @exons = @{$t->get_all_Exons};
-#    print "transcript sequence :\n".$t->seq."\n";
-    foreach my $e(@exons){
-      my $seq = $e->seq;
-      my $pep0 = $seq->translate('*', 'X', 0);
-      my $pep1 = $seq->translate('*', 'X', 1);
-      my $pep2 = $seq->translate('*', 'X', 2);
-      print "exon sequence :\n".$e->seq->seq."\n\n";
-      print $e->seqname." ".$e->start." : ".$e->end." translation in 0 frame\n ".$pep0->seq."\n\n";
-      print $e->seqname." ".$e->start." : ".$e->end." translation in 1 phase\n ".$pep2->seq."\n\n";
-      print $e->seqname." ".$e->start." : ".$e->end." translation in 2 phase\n ".$pep1->seq."\n\n";
-      print "\n\n";
-      
-    }
-    
-  }
-}