Skip to content
Snippets Groups Projects
Commit d49074d9 authored by Graham McVicker's avatar Graham McVicker
Browse files

moved routines related to storing of final genes into Gene package, added...

moved routines related to storing of final genes into Gene package, added splitting of v. long introns, clustering of transcripts into genes, removal of duplicate transcripts and brinnging   across xrefs
parent d49c7a3c
No related branches found
No related tags found
No related merge requests found
use strict;
use warnings;
#
# Utility methods for the storing of chimp genes
#
package Gene;
use Bio::EnsEMBL::Utils::Exception qw(info throw);
use constant NEAR => 1.5e6;
our %KEEP_XREF = ('SWISSPROT' => 1,
'SPTREMBL' => 1,
'HUGO' => 1);
###############################################################################
# store gene
#
# Builds Ensembl genes from the generated chimp transcripts and stores them
# in the database.
#
###############################################################################
sub store_gene {
my $db = shift;
my $hum_gene = shift; # human gene
my $ctranscripts = shift; # chimp transcripts
my $MIN_AA_LEN = 15;
my $MIN_NT_LEN = 600;
my $analysis = $db->get_AnalysisAdaptor->fetch_by_logic_name('ensembl');
# Look at the translations and convert any transcripts with stop codons
# into pseudogenes
foreach my $ct (@$ctranscripts) {
if($ct->translation && $ct->translate->seq() =~ /\*/) {
$ct->translation(undef);
}
}
# transfer xrefs from human transcripts/translations
transfer_xrefs($hum_gene, $ctranscripts);
# compact duplicate transcripts into the same transcripts
$ctranscripts = compact_transcripts($ctranscripts);
# group all close together transcripts on the same strand into clusters
my $clusters = cluster_transcripts($ctranscripts);
my $gene_adaptor = $db->get_GeneAdaptor();
foreach my $cluster (@$clusters) {
# keep genes only if there is a minimum amount of nucleotide
# OR amino acid sequence in transcripts in the same region
if($cluster->{'nt_len'} < $MIN_NT_LEN &&
$cluster->{'aa_len'} < $MIN_AA_LEN) {
next;
}
# one gene for each cluster
my $cgene = Bio::EnsEMBL::Gene->new();
generate_stable_id($cgene);
# rename transcripts and add to gene
foreach my $ctrans (@{$cluster->{'transcripts'}}) {
generate_stable_id($ctrans);
# rename translation
if($ctrans->translation) {
generate_stable_id($ctrans->translation);
}
$cgene->add_Transcript($ctrans);
}
# rename all of the exons
# but watch out because duplicate exons will be merged and we do not
# want to generate multiple names
my %ex_stable_ids;
foreach my $ex (@{$cgene->get_all_Exons()}) {
if($ex_stable_ids{$ex->hashkey()}) {
$ex->stable_id($ex_stable_ids{$ex->hashkey()});
} else {
generate_stable_id($ex);
$ex_stable_ids{$ex->hashkey()} = $ex->stable_id();
}
}
foreach my $gx (@{$hum_gene->get_all_DBEntries}) {
$cgene->add_DBEntry($gx) if($KEEP_XREF{uc($gx->dbname())});
}
if($hum_gene->display_xref &&
$KEEP_XREF{uc($hum_gene->display_xref->dbname)}){
$cgene->display_xref($hum_gene->display_xref);
}
# set the analysis on the gene object
$cgene->analysis($analysis);
my $name = $cgene->stable_id();
$name .= '/'.$cgene->display_xref->display_id() if($cgene->display_xref());
$cgene->type('ensembl');
# store the bloody thing
print STDERR "Storing gene: $name\n";
$gene_adaptor->store($cgene);
}
return;
}
###############################################################################
# generate_stable_id
#
# Generates a stable_id for a gene, transcript, translation or exon and sets
# it on the object.
#
###############################################################################
my ($TRANSCRIPT_NUM, $GENE_NUM, $EXON_NUM, $TRANSLATION_NUM);
sub generate_stable_id {
my $object = shift;
my $SPECIES_PREFIX = 'PTR';
my $PAD = 18;
my $type_prefix;
my $num;
if($object->isa('Bio::EnsEMBL::Exon')) {
$type_prefix = 'E';
$EXON_NUM ||= 0;
$num = ++$EXON_NUM;
} elsif($object->isa('Bio::EnsEMBL::Transcript')) {
$type_prefix = 'T';
$TRANSCRIPT_NUM ||= 0;
$num = ++$TRANSCRIPT_NUM;
} elsif($object->isa('Bio::EnsEMBL::Gene')) {
$type_prefix = 'G';
$GENE_NUM ||= 0;
$num = ++$GENE_NUM;
} elsif($object->isa('Bio::EnsEMBL::Translation')) {
$type_prefix = 'P';
$TRANSLATION_NUM ||= 0;
$num = ++$TRANSLATION_NUM;
} else {
throw('Unknown object type '.ref($object).'. Cannot create stable_id.');
}
my $prefix = "ENS${SPECIES_PREFIX}${type_prefix}";
my $pad = $PAD - length($prefix) - length($num);
$object->version(1);
$object->stable_id($prefix . ('0'x$pad) . $num);
}
###############################################################################
# compact_transcripts
#
# Given a set of transcripts this function removes duplicate transcripts and
# returns the unique set
#
###############################################################################
sub compact_transcripts {
my $transcripts = shift;
my %unique_hash;
my @unique_list;
info("Compacting transcripts");
foreach my $transcript (@$transcripts) {
my $hashkey = '';
foreach my $exon (@{$transcript->get_all_Exons}) {
$hashkey .= '('.$exon->hashkey.')';
}
$unique_hash{$hashkey} ||= [];
push @{$unique_hash{$hashkey}}, $transcript;
}
# merge xrefs from duplicated transcripts
foreach my $key (keys %unique_hash) {
# choose one of the duplicates arbitrarily
my $duplicates = $unique_hash{$key};
my $transcript = pop(@$duplicates);
# merge all of the xrefs and add them to the one transcript that will
# be kept
merge_xrefs($transcript, $duplicates);
push @unique_list, $transcript;
}
return \@unique_list;
}
sub merge_xrefs {
my $kept_transcript = shift;
my $duplicates = shift;
return if(!@$duplicates);
info('Merging xrefs from duplicate transcripts');
# construct a set of xrefs the transcript to keep already has
my %existing_tl_xrefs;
my %existing_tr_xrefs;
foreach my $xref (@{$kept_transcript->get_all_DBEntries()}) {
$existing_tr_xrefs{$xref->dbname().':'.$xref->primary_id()} = 1;
}
if($kept_transcript->translation()) {
foreach my $xref (@{$kept_transcript->translation->get_all_DBEntries()}) {
$existing_tl_xrefs{$xref->dbname().':'.$xref->primary_id()} = 1;
}
}
# collect a list of additional xrefs which should be kept
my %tl_xrefs;
my %tr_xrefs;
foreach my $dup (@$duplicates) {
foreach my $xref (@{$dup->get_all_DBEntries()}) {
my $key = $xref->dbname().':'.$xref->primary_id();
$tr_xrefs{$key} = $xref if(!$existing_tr_xrefs{$key});
}
if($dup->translation()) {
foreach my $xref (@{$dup->translation->get_all_DBEntries()}) {
my $key = $xref->dbname().':'.$xref->primary_id();
$tl_xrefs{$key} = $xref if(!$existing_tl_xrefs{$key});
}
}
}
# add any new xrefs which were found to the kept translation
foreach my $xref (values %tr_xrefs) {
$kept_transcript->add_DBEntry($xref);
}
my @new_tl_xrefs = values(%tl_xrefs);
my $tl = $kept_transcript->translation();
if(@new_tl_xrefs && !$tl) {
throw("Some duplicate transcripts have translations, and others do not?");
}
foreach my $xref (@new_tl_xrefs) {
$tl->add_DBEntry($xref);
}
return;
}
###############################################################################
# cluster_transcripts
#
# Given a set of transcripts this function will cluster them into groups
# which are near to each other. Note this may be more efficient to use
# a sorted list, but for the small size of the clusters it should not matter.
#
##############################################################################
sub cluster_transcripts {
my $transcripts = shift;
my @clusters;
info("Clustering transcripts");
foreach my $tr (@$transcripts) {
my $cl = undef;
foreach my $c (@clusters) {
if(is_near($tr->start(),$tr->end(),$tr->strand(),
$c->{'start'}, $c->{'end'}, $c->{'strand'})) {
$cl = $c;
}
}
if($cl) {
push @{$cl->{'transcripts'}}, $tr;
$cl->{'end'} =
( $tr->end > $cl->{'end'} ) ? $tr->{'end'} : $cl->{'end'};
$cl->{'start'} =
( $tr->start < $cl->{'start'} ) ? $tr->{'start'} : $cl->{'start'};
} else {
$cl = {'start' => $tr->start(),
'end' => $tr->end(),
'strand' => $tr->strand(),
'transcripts' => [$tr]};
push @clusters, $cl;
}
}
# now cluster clusters
for(my $i = 0; $i < @clusters; $i++) {
for(my $j = $i+1; $j < @clusters; $j++) {
my $c1 = $clusters[$i];
my $c2 = $clusters[$j];
if(is_near($c1->{'start'}, $c1->{'end'}, $c1->{'strand'},
$c2->{'start'}, $c2->{'end'}, $c2->{'strand'})) {
# merge clusters and take one of them out of the list
splice(@clusters, $j, 1);
$c1->{'start'} =
($c1->{'start'} < $c2->{'start'}) ? $c1->{'start'} : $c2->{'start'};
$c1->{'end'} =
($c1->{'end'} > $c2->{'end'}) ? $c1->{'end'} : $c2->{'end'};
push @{$c1->{'transcripts'}}, @{$c2->{'transcripts'}};
# start over again
$i = -1;
$j = -1;
}
}
}
# sum the nucleotide length and amino acid lengths of each of the
# transcripts in a given cluster
foreach my $cl (@clusters) {
$cl->{'nt_len'} = 0;
$cl->{'aa_len'} = 0;
foreach my $tr (@{$cl->{'transcripts'}}) {
$cl->{'nt_len'} += length($tr->spliced_seq());
if($tr->translation) {
$cl->{'aa_len'} += length($tr->translate->seq());
}
}
}
return \@clusters;
}
sub is_near {
my ($start1,$end1,$strand1, $start2, $end2, $strand2) = @_;
# cannot be clustered if not on same strand
if($strand1 != $strand2) {
return 0;
}
# check if the regions overlap
if($end1 >= $start2 && $start1 <= $end2) {
return 1;
}
if($start1 > $end2) {
return (($start1 - $end2) < NEAR) ? 1 : 0;
}
return (($start2 - $end1) < NEAR) ? 1 : 0;
}
sub transfer_xrefs {
my $hum_gene = shift;
my $chimp_transcripts = shift;
my %chimp_transcripts;
my %chimp_translations;
foreach my $tr (@$chimp_transcripts) {
$chimp_transcripts{$tr->stable_id()} ||= [];
push @{$chimp_transcripts{$tr->stable_id()}}, $tr;
my $tl = $tr->translation();
if($tl) {
$chimp_translations{$tl->stable_id()} ||= [];
push @{$chimp_translations{$tl->stable_id()}}, $tl;
}
}
foreach my $tr (@{$hum_gene->get_all_Transcripts()}) {
foreach my $chimp_tr (@{$chimp_transcripts{$tr->stable_id}}) {
foreach my $xref (@{$tr->get_all_DBEntries}) {
$chimp_tr->add_DBEntry($xref) if($KEEP_XREF{uc($xref->dbname())});
}
if($tr->display_xref() && $KEEP_XREF{uc($tr->display_xref->dbname)}) {
$chimp_tr->display_xref($tr->display_xref);
}
}
my $tl = $tr->translation();
if($tl) {
foreach my $xref (@{$tl->get_all_DBEntries}) {
foreach my $chimp_tl (@{$chimp_translations{$tl->stable_id()}}) {
$chimp_tl->add_DBEntry($xref) if($KEEP_XREF{uc($xref->dbname())});
}
}
}
}
return;
}
1;
......@@ -18,6 +18,8 @@ use Bio::EnsEMBL::Exon;
use Bio::EnsEMBL::Utils::Exception qw(throw info);
use constant MAX_INTRON_LEN => 2e6;
#
# sanity checks the interim exons, and splits this
......@@ -115,9 +117,6 @@ sub check_iexons {
return check_iexons($itranscript, $itranscript_array);
}
$prev_end = $iexon->end();
$prev_start = $iexon->start();
if (!defined($transcript_strand)) {
$transcript_strand = $iexon->strand();
} elsif ($transcript_strand != $iexon->strand()) {
......@@ -134,6 +133,31 @@ sub check_iexons {
}
return check_iexons($itranscript, $itranscript_array);
}
# watch out for extremely long introns
my $intron_len = 0;
if(defined($prev_start)) {
if($iexon->strand() == 1) {
$intron_len = $iexon->start - $prev_end + 1;
} else {
$intron_len = $prev_start - $iexon->end + 1;
}
}
if($intron_len > MAX_INTRON_LEN) {
info(" very long intron, splitting transcripts");
my $keep_exon = 1;
my $first_transcript = split_itrans($itranscript, $iexon, $keep_exon);
if($first_transcript) {
push @$itranscript_array, $first_transcript;
}
return check_iexons($itranscript, $itranscript_array);
}
$prev_end = $iexon->end();
$prev_start = $iexon->start();
}
$itranscript_array ||= [];
......
......@@ -10,7 +10,7 @@ use vars qw(@ISA @EXPORT_OK);
@ISA = qw(Exporter);
@EXPORT_OK = qw(print_exon print_coords print_translation);
@EXPORT_OK = qw(print_exon print_coords print_translation print_three_phase_translation);
......@@ -109,4 +109,24 @@ sub print_translation {
}
sub print_three_phase_translation {
my $transcript = shift;
return if(!$transcript->translation());
my $orig_phase = $transcript->start_Exon->phase();
foreach my $phase (0,1,2) {
info("======== Phase $phase translation: ");
$transcript->start_Exon->phase($phase);
info("Peptide: " . $transcript->translate->seq() . "\n\n===============");
}
$transcript->start_Exon->phase($orig_phase);
return;
}
1;
......@@ -13,6 +13,7 @@ use StatMsg;
use Deletion;
use Insertion;
use Transcript;
use Gene;
use StatLogger;
use StatMsg;
......@@ -152,6 +153,8 @@ use Bio::EnsEMBL::Utils::Exception qw(throw info verbose warning);
info("Gene: ".$gene->stable_id);
my $transcripts = $gene->get_all_Transcripts();
my @finished;
foreach my $transcript (@$transcripts) {
next if(!$transcript->translation); #skip pseudo genes
......@@ -162,98 +165,102 @@ use Bio::EnsEMBL::Utils::Exception qw(throw info verbose warning);
my $finished_transcripts =
create_transcripts($interim_transcript, $slice_adaptor);
my $transcript_count = @$finished_transcripts;
my $translation_count = 0;
my $stop_codons_count = 0;
if($transcript_count > 1) {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::SPLIT);
}
elsif($transcript_count== 0) {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::NO_SEQUENCE_LEFT);
# set the translation stable identifier on the finished transcripts
foreach my $tr (@$finished_transcripts) {
if($tr->translation() && $transcript->translation()) {
$tr->translation->stable_id($transcript->translation->stable_id);
}
}
foreach my $ftrans (@$finished_transcripts) {
if($ftrans->translation()) {
$translation_count++;
my $pep = $ftrans->translate->seq();
# This method call is optional, just for statistics gathering
# and extra sanity checking / debug output:
gen_transcript_stats($finished_transcripts);
print STDERR "\n\n$pep\n\n";
push @finished, @$finished_transcripts;
}
if($pep =~ /\*/) {
$stop_codons_count++;
}
if($store) {
Gene::store_gene($dest_db, $gene, \@finished);
}
}
}
}
# sanity check, if translation is defined we expect a peptide
if(!$pep) {
print_translation($ftrans->translation());
throw("Unexpected Translation but no peptide");
}
} else {
print STDERR "NO TRANSLATION LEFT\n";
}
}
# If there were stop codons in one of the split transcripts
# report it. Report it as 'entire' if all split transcripts had
# stops.
if($stop_codons_count) {
my $code = StatMsg::TRANSCRIPT | StatMsg::DOESNT_TRANSLATE;
if($stop_codons_count == $translation_count) {
$code |= StatMsg::ENTIRE;
} else {
$code |= StatMsg::PARTIAL;
}
StatMsg->new($code);
}
if(!$translation_count) {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::NO_CDS_LEFT);
}
#
# method which does some sanity checking of generated transcripts,
# gathers some stats about the completed transcripts and prints the
# created translations to STDERR
#
sub gen_transcript_stats {
my $finished_transcripts = shift;
if($translation_count) {
if($stop_codons_count) {
if($translation_count > $stop_codons_count) {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::TRANSLATES |
StatMsg::PARTIAL);
}
} else {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::TRANSLATES |
StatMsg::ENTIRE);
}
}
my $transcript_count = @$finished_transcripts;
my $translation_count = 0;
my $stop_codons_count = 0;
# foreach my $ftr (@$finished_transcripts) {
# print_three_phase_translation($ftr);
# }
if($transcript_count > 1) {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::SPLIT);
}
elsif($transcript_count== 0) {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::NO_SEQUENCE_LEFT);
}
if($store) {
store_gene($dest_db, $gene, $finished_transcripts);
}
foreach my $ftrans (@$finished_transcripts) {
if($ftrans->translation()) {
$translation_count++;
my $pep = $ftrans->translate->seq();
print STDERR "\n\n$pep\n\n";
if($pep =~ /\*/) {
$stop_codons_count++;
}
# sanity check, if translation is defined we expect a peptide
if(!$pep) {
print_translation($ftrans->translation());
throw("Unexpected Translation but no peptide");
}
} else {
print STDERR "NO TRANSLATION LEFT\n";
}
}
}
sub print_three_phase_translation {
my $transcript = shift;
return if(!$transcript->translation());
# If there were stop codons in one of the split transcripts
# report it. Report it as 'entire' if all split transcripts had
# stops.
if($stop_codons_count) {
my $code = StatMsg::TRANSCRIPT | StatMsg::DOESNT_TRANSLATE;
if($stop_codons_count == $translation_count) {
$code |= StatMsg::ENTIRE;
} else {
$code |= StatMsg::PARTIAL;
}
StatMsg->new($code);
}
my $orig_phase = $transcript->start_Exon->phase();
if(!$translation_count) {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::NO_CDS_LEFT);
}
foreach my $phase (0,1,2) {
info("======== Phase $phase translation: ");
$transcript->start_Exon->phase($phase);
info("Peptide: " . $transcript->translate->seq() . "\n\n===============");
if($translation_count) {
if($stop_codons_count) {
if($translation_count > $stop_codons_count) {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::TRANSLATES |
StatMsg::PARTIAL);
}
} else {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::TRANSLATES |
StatMsg::ENTIRE);
}
}
}
$transcript->start_Exon->phase($orig_phase);
return;
}
......@@ -272,11 +279,6 @@ sub transfer_transcript {
my $human_exons = $transcript->get_all_Exons();
if (!$transcript->translation()) { # watch out for pseudogenes
info("pseudogene - discarding");
return;
}
my $chimp_cdna_pos = 0;
my $cdna_exon_start = 1;
......@@ -403,25 +405,20 @@ sub transfer_transcript {
# insert in chimp, deletion in human
#
#info("before insert, CDNA_POS= $chimp_cdna_pos");
Insertion::process_insert(\$chimp_cdna_pos, $insert_len,
$chimp_exon, $chimp_transcript);
$chimp_cdna_pos += $insert_len;
#info("after insert, CDNA_POS= $chimp_cdna_pos");
}
}
$chimp_cdna_pos += $c->length();
#info("after match (" . $c->length(). ") CDNA_POS= $chimp_cdna_pos");
}
} # foreach coord
}
}
$cdna_exon_start = $chimp_cdna_pos + 1;
# info("after exon, CDNA_POS= $chimp_cdna_pos");
$chimp_transcript->add_Exon($chimp_exon);
} # foreach exon
......@@ -555,177 +552,6 @@ sub create_transcripts {
###############################################################################
# store gene
#
# Builds Ensembl genes from the generated chimp transcripts and stores them
# in the database.
#
###############################################################################
sub store_gene {
my $db = shift;
my $hum_gene = shift; # human gene
my $ctranscripts = shift; # chimp transcripts
my $MIN_AA_LEN = 15;
my $MIN_NT_LEN = 600;
my $analysis = $db->get_AnalysisAdaptor->fetch_by_logic_name('ensembl');
# Look at the translations and convert any transcripts with stop codons
# into pseudogenes
foreach my $ct (@$ctranscripts) {
if($ct->translation && $ct->translate->seq() =~ /\*/) {
$ct->translation(undef);
}
}
# Group transcripts by their strand and scaffold. We
# cannot really build genes that spand scaffolds or strands
my (%ctrans_hash, %nt_lens, %aa_lens);
foreach my $ct (@$ctranscripts) {
my $region = $ct->slice->seq_region_name() . ':' . $ct->strand();
$ctrans_hash{$region} ||= [];
push @{$ctrans_hash{$region}}, $ct;
# keep track of how many nucleotides and amino acids are in the
# transcripts from this gene that made it to this area. If there
# are not many, the transcript should probably be rejected.
my $nt_len = length($ct->spliced_seq()) || 0;
my $aa_len = ($ct->translation()) ? length($ct->translate->seq()) : 0;
$nt_lens{$region} ||= 0;
$nt_lens{$region} += $nt_len;
$aa_lens{$region} ||= 0;
$aa_lens{$region} += $aa_len;
}
my %chimp_genes;
my $gene_adaptor = $db->get_GeneAdaptor();
foreach my $region (keys %ctrans_hash) {
# keep transcripts if there is a minimum amount of nucleotide
# OR amino acid sequence in transcripts in the same region
next if($nt_lens{$region}<$MIN_NT_LEN && $aa_lens{$region}<$MIN_AA_LEN);
# one gene for each region
my $cgene = $chimp_genes{$region} ||= Bio::EnsEMBL::Gene->new();
generate_stable_id($cgene);
# rename transcripts and add to gene
foreach my $ctrans (@{$ctrans_hash{$region}}) {
generate_stable_id($ctrans);
# rename translation
if($ctrans->translation) {
generate_stable_id($ctrans->translation);
}
$cgene->add_Transcript($ctrans);
}
# rename all of the exons
# but watch out because duplicate exons will be merged and we do not
# want to generate multiple names
my %ex_stable_ids;
foreach my $ex (@{$cgene->get_all_Exons()}) {
if($ex_stable_ids{$ex->hashkey()}) {
$ex->stable_id($ex_stable_ids{$ex->hashkey()});
} else {
generate_stable_id($ex);
$ex_stable_ids{$ex->hashkey()} = $ex->stable_id();
}
}
# set the analysis on the gene object
$cgene->analysis($analysis);
# for now just grab all HUGO xrefs, and take last one as display xref;
my $display_xref;
foreach my $gx (@{$hum_gene->get_all_DBLinks()}) {
if(uc($gx->dbname()) eq 'HUGO') {
$cgene->add_DBEntry($gx);
$display_xref = $gx;
}
}
$cgene->display_xref($display_xref) if($display_xref);
my $name = $cgene->stable_id();
$name .= '/'.$display_xref->display_id() if($display_xref);
$cgene->type('ensembl');
# store the bloody thing
print STDERR "Storing gene: $name\n";
$gene_adaptor->store($cgene);
}
return;
}
###############################################################################
# generate_stable_id
#
# Generates a stable_id for a gene, transcript, translation or exon and sets
# it on the object.
#
###############################################################################
my ($TRANSCRIPT_NUM, $GENE_NUM, $EXON_NUM, $TRANSLATION_NUM);
sub generate_stable_id {
my $object = shift;
my $SPECIES_PREFIX = 'PTR';
my $PAD = 18;
my $type_prefix;
my $num;
if($object->isa('Bio::EnsEMBL::Exon')) {
$type_prefix = 'E';
$EXON_NUM ||= 0;
$num = ++$EXON_NUM;
} elsif($object->isa('Bio::EnsEMBL::Transcript')) {
$type_prefix = 'T';
$TRANSCRIPT_NUM ||= 0;
$num = ++$TRANSCRIPT_NUM;
} elsif($object->isa('Bio::EnsEMBL::Gene')) {
$type_prefix = 'G';
$GENE_NUM ||= 0;
$num = ++$GENE_NUM;
} elsif($object->isa('Bio::EnsEMBL::Translation')) {
$type_prefix = 'P';
$TRANSLATION_NUM ||= 0;
$num = ++$TRANSLATION_NUM;
} else {
throw('Unknown object type '.ref($object).'. Cannot create stable_id.');
}
my $prefix = "ENS${SPECIES_PREFIX}${type_prefix}";
my $pad = $PAD - length($prefix) - length($num);
$object->version(1);
$object->stable_id($prefix . ('0'x$pad) . $num);
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment