my$gene_remark='This locus has been annotated as fragmented because either there is not enough evidence covering the whole locus to identify the exact exon structure of the transcript, or because the transcript spans a gap in the assembly';
$self->log("Fragmented loci annotation remark for gene $gid already exists\n");
}
#add gene_attrib
else{
if(!$self->param('dry_run')){
$aa->store_on_Gene($gid,$attrib);
}
$self->log("$action correctly formatted fragmented loci annotation remark for gene $gsi\n");
}
return(0,$gene_c,$trans_c);
}
#log if it's been reported before since the gene should have a remark.
elsif($seen_genes->{$gsi}eq'fragmented'){
$self->log_warning("PREVIOUS: $action correctly formatted fragmented loci annotation remark for gene $gsi (has previously been OKeyed by Havana as being fragmented but has no Annotation remark, please add one!)\n");
print$k_flist_fh"$gsi\n";
#add gene_attrib anyway.
if(!$self->param('dry_run')){
$aa->store_on_Gene($gid,$attrib);
}
return(0,$gene_c,$trans_c);
}
#otherwise patch transcript names according to length and CDS
else{
$gene_c++;
my@trans=$gene->get_all_Transcripts();
#separate coding and non_coding transcripts
my$coding_trans=[];
my$noncoding_trans=[];
foreachmy$trans(@{$gene->get_all_Transcripts()}){
if($trans->translate){
push@$coding_trans,$trans;
}
else{
push@$noncoding_trans,$trans;
}
}
#sort transcripts coding > non-coding, then on length
my$c=0;
$self->log("\nPatching names according to CDS and length:\n",1);
$self->log_warning("Problem looking for overlapping transcripts for gene $gsi (is_current = 0 ?). Skipping this bit\n");
}
#if the transcripts don't overlap
elsif(@{$non_overlaps}){
my$tsi_string;
foreachmy$id(@{$non_overlaps}){
my$string="$id [ $ids_to_names{$id} ] ";
$tsi_string.=$string;
}
$self->log_warning("NEW: Non-overlapping: $gsi ($g_name) has non-overlapping transcripts ($tsi_string) with duplicated Vega names, and it has no \'Annotation_remark- fragmented_loci\' on the gene or \'\%fragmen\%\' remark on any transcripts. Neither has it been OKeyed by Havana before. Transcript names are being patched but this needs checking by Havana.\n");
#log gsi (to be sent to Havana)
print$n_flist_fh"$gsi\n";
}
#...otherwise if the transcripts do overlap
elsif($self->param('verbose')){
$self->log_warning("NEW: Overlapping: $gsi ($g_name) has overlapping transcripts ($all_t_names) with Vega duplicated names and it has no \'Annotation_remark- fragmented_loci\' on the gene or \'\%fragmen\%\' remark on any transcripts. Neither has it been OKeyed by Havana before. Transcript names are being patched but this could be checked by Havana if they were feeling keen.\n");
print$n_flist_fh"$gsi\n";
}
}
}
=head2 get_havana_fragmented_loci_comments
Args : none
Example : my $results = $support->get_havana_fragmented_loci_comments
Description: parses the HEREDOC containing Havana comments in this module
Returntype : hashref
=cut
sub get_havana_fragmented_loci_comments{
my$seen_genes;
while(<DATA>){
nextif/^\s+$/or/#+/;
my($obj,$comment)=split/=/;
$obj=~s/^\s+|\s+$//g;
$comment=~s/^\s+|\s+$//g;
$seen_genes->{$obj}=$comment;
}
return$seen_genes;
}
#details of genes with duplicated transcript names that have already been reported to Havana
#identified as either fragmented or as being OK to patch