Commit 4482a11c authored by Javier Santoyo-Lopez's avatar Javier Santoyo-Lopez
Browse files

changed write_acedb to warn about transcripts

in other contigs
parent 5b987cb5
......@@ -522,8 +522,14 @@ sub get_AnnSeq {
=head2 write_acedb
Title : write_acedb
Usage : $contig->write_acedb(\*FILEHANDLE);
$contig->write_acedb(\*FILEHANDLE,$ace_seq_name);
Usage : $contig->write_acedb(\*FILEHANDLE);
$contig->write_acedb(\*FILEHANDLE,$ace_seq_name, $type, $supp_evid);
$ace_seq_name refers to the name of the aceDB-clone name
$type refers to the type of gene in the ensEMBL database
$supp_evid, indicates supporting evidences
by default: $ace_seq_name is the accession number
$type is ensembl
$supp_evid is 0
Function: Dumps exon, transcript and gene objects in acedb format
Returns :
Args :
......@@ -531,93 +537,97 @@ sub get_AnnSeq {
=cut
sub write_acedb {
my ($self,$fh,$seqname) = @_;
my ($self,$fh,$seqname,$type,$supp_evid) = @_;
my $contig_id = $self->id();
$type ||= 'ensembl';
$supp_evid ||= 0;
$seqname ||= $contig_id;
# get all the genes of this contig and analyze each of them
foreach my $gene ($self->get_Genes_by_Type('ensembl',1)){
# get all genes
my @genes = $self->get_Genes_by_Type( $type, $supp_evid );
# exit if the clone has no genes
unless (@genes) {
print STDERR "'$seqname' has no genes\n";
return;
}
GENE:
foreach my $gene ( @genes ){
my $gene_id=$gene->id;
#get all the transcripts of a particular gene.
my @transcripts_in_gene = $gene->each_Transcript;
# get all the transcripts of this gene.
my @trans_in_gene = $gene->each_Transcript;
#Get all exons for every transcript
foreach my $trans ( @transcripts_in_gene ) {
my $trans_id=$trans->id;
my @exons = $trans->each_Exon;
my $number_of_exons = @exons;
my @exons_in_contig;
my @exon_number_in_contig;
my $exon_number_in_transcript=0;
# get another gene if this one has no transcripts (pseudogene)
unless (@trans_in_gene ) {
print STDERR "'$seqname' contains a gene with no transcripts (gene_id: '$gene_id')\n";
next GENE;
}
# for each transcript
TRANSCRIPT:
foreach my $trans ( @trans_in_gene ) {
my $trans_id=$trans->id;
# get all exons of this transcript
my @exons = $trans->each_Exon;
# get transcript exons which belong to the contig
my @exons_in_contig;
# check if the transcript have exons in ohter contigs
EXON:foreach my $exon ( @exons ) {
$exon_number_in_transcript++;
if ( $exon->contig_id eq $contig_id ) {
push ( @exons_in_contig,$exon );
push ( @exon_number_in_contig,$exon_number_in_transcript);
EXON:
foreach my $exon ( @exons ) {
if ( $exon->contig_id eq $contig_id ) {
push ( @exons_in_contig,$exon );
} else { next EXON; }
}
my $number_of_exons_in_transcript = $exon_number_in_transcript;
# check the strand of the transcript ($tsrand == 1 means sense strand)
# and get the coordinates of the transcript in the contig
my $tstrand = $exons_in_contig[0]->strand;
my ($tstart,$tend);
if( $tstrand == 1 ) {
$tstart = $exons_in_contig[0]->start;
$tend = $exons_in_contig[$#exons_in_contig]->end;
} else {
$tstart = $exons_in_contig[0]->end;
$tend = $exons_in_contig[$#exons_in_contig]->start;
}
# start .ace file printing...
print $fh "Sequence $seqname\n";
print $fh "Subsequence $trans_id $tstart $tend\n\n";
# print coordinates of the transcript relative to the contig
print $fh "Sequence $trans_id\nSource $seqname\nCDS\nCDS_predicted_by EnsEMBL\nMethod EnsEMBL\n";
#print coordinates of each exon relative to the transcript
foreach my $exon ( @exons_in_contig ) {
if (@exons_in_contig) {
# check the strand and get the coordinates
my $tstrand = $exons_in_contig[0]->strand;
my ($tstart,$tend);
if( $tstrand == 1 ) {
print $fh "Source_Exons ", ($exon->start - $tstart + 1)," ",($exon->end - $tstart +1), "\n";
$tstart = $exons_in_contig[0]->start;
$tend = $exons_in_contig[$#exons_in_contig]->end;
} else {
print $fh "Source_Exons ", ($tstart - $exon->end +1 ), " ",($tstart - $exon->start+1),"\n";
}
$tstart = $exons_in_contig[0]->end;
$tend = $exons_in_contig[$#exons_in_contig]->start;
}
# start .ace file printing...
print $fh "Sequence $seqname\n";
print $fh "Subsequence $trans_id $tstart $tend\n\n";
# print coordinates of the transcript relative to the contig
print $fh "Sequence $trans_id\nSource $seqname\nCDS\nCDS_predicted_by EnsEMBL\nMethod EnsEMBL\n";
# print coordinates of each exon relative to the transcript
foreach my $exon ( @exons_in_contig ) {
if( $tstrand == 1 ) {
print $fh "Source_Exons ", ($exon->start - $tstart + 1)," ",($exon->end - $tstart +1), "\n";
} else {
print $fh "Source_Exons ", ($tstart - $exon->end +1 ), " ",($tstart - $exon->start+1),"\n";
}
}
# indicate end or start not found for transcript across several contigs
if ($exons[0] != $exons_in_contig[0]) {
print $fh "Start_not_found\n";
}
if ($exons[$#exons] != $exons_in_contig[$#exons_in_contig]) {
print $fh "End_not_found\n";
}
print $fh "\n\n";
} else {
print STDERR "'$trans_id' has no exons in '$seqname'\n";
next TRANSCRIPT;
}
#if the transcript possesses exons in other contigs, indicate which end is missing
my $number_of_first_exon_in_contig = $exon_number_in_contig[0];
my $number_of_last_exon_in_contig = $exon_number_in_contig[$#exon_number_in_contig];
if ( $number_of_first_exon_in_contig != 1) {
print $fh "Start_not_found\n";
}
if ( $number_of_last_exon_in_contig != $number_of_exons_in_transcript ) {
print $fh "End_not_found\n";
}
print $fh "\n\n";
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment