merge from branch-ensembl-dec09

6c9580d1 · Steve Trevanion · 31d97ee0 · 6c9580d1 · 6c9580d1 · 6c9580d1
Commit 6c9580d1 authored 15 years ago by Steve Trevanion
--- a/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm
+++ b/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm
@@ -1124,6 +1124,7 @@ sub _by_chr_num {
                large chromosomes
  Arg[2]      : (optional) Boolean to include duplicate regions, ie PAR or not
                (default is no)
+  Arg[3]      : (optional) Coordsystem version to retrieve

  Example     : my $chr_slices = $support->split_chromosomes_by_size;
                foreach my $block_size (keys %{ $chr_slices }) {
@@ -1150,6 +1151,7 @@ sub split_chromosomes_by_size {
  my $self   = shift;
  my $cutoff = shift || 5000000;
  my $dup    = shift || 0;
+  my $cs_version = shift;
  my $slice_adaptor = $self->dba->get_SliceAdaptor;
  my $top_slices;
  if ($self->param('chromosomes')) {
@@ -1157,7 +1159,7 @@ sub split_chromosomes_by_size {
      push @{ $top_slices }, $slice_adaptor->fetch_by_region('chromosome', $chr);
    }
  } else {
-    $top_slices = $slice_adaptor->fetch_all('chromosome',undef,0,$dup);
+    $top_slices = $slice_adaptor->fetch_all('chromosome',$cs_version,0,$dup);
  }

  my ($big_chr, $small_chr, $min_big_chr, $min_small_chr);

--- a/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm
+++ b/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm
@@ -99,7 +99,7 @@ sub check_remarks_and_update_names {
  if ($@) {
    $g_name = $gene->get_all_Attributes('name')->[0]->value;
  }
-  my $gene_remark = 'This locus has been annotated as fragmented because either there is not enough evidence covering the whole locus to identify the exact exon structure of the transcript, or because the transcript spans a gap in  the assembly';
+  my $gene_remark = 'This locus has been annotated as fragmented because either there is not enough evidence covering the whole locus to identify the exact exon structure of the transcript, or because the transcript spans a gap in the assembly';
  my $attrib = [
    Bio::EnsEMBL::Attribute->new(
      -CODE => 'remark',
@@ -145,7 +145,7 @@ sub check_remarks_and_update_names {
    }
  }

-  #patch transcript names according to length and CDS
+  ##patch transcript names according to length and CDS
  $gene_c++;

  #separate coding and non_coding transcripts

--- a/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm
+++ b/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm
@@ -127,10 +127,13 @@ sub check_CDS_start_end_remarks_loutre {
  foreach my $attribute (@{$trans->get_all_Attributes()}) {
    $attributes{$attribute->code} = $attribute;
  }
+#  warn $trans->stable_id;
+#  warn Data::Dumper::Dumper(\%attributes);
  my $coding_end   = $trans->cdna_coding_end;
  my $coding_start = $trans->cdna_coding_start;
  my $trans_end    = $trans->length;
  my $trans_seq    = $trans->seq->seq;
+  my $stop_codon_offset = 3 + $trans->translation->end_Exon->end_phase;
  my $stop_codon   = substr($trans_seq, $coding_end-3, 3);
  my $start_codon  = substr($trans_seq, $coding_start-1, 3);

@@ -138,37 +141,49 @@ sub check_CDS_start_end_remarks_loutre {
  my $results;

  #extra CDS end not found remarks
-  if ( ($attributes{'cds_end_NF'}->value == 1)
-	 && ($coding_end != $trans_end) 
+  if ($attributes{'cds_end_NF'}) {
+    if ( ($attributes{'cds_end_NF'}->value == 1)
+	   && ($coding_end != $trans_end) 
 	   && ( grep {$_ eq $stop_codon} @stops) ) {
-    $results->{'END_EXTRA'} = 1;
+#      warn $trans->stable_id.": $coding_end--$trans_end--$stop_codon";
+#      warn $trans->translation->end_Exon->end_phase;
+      $results->{'END_EXTRA'} = $stop_codon1;
+    }
  }
  #missing CDS end not found remark
  if ( $coding_end == $trans_end ) {
-    if ($attributes{'cds_end_NF'}->value == 0 ) {
-      if (grep {$_ eq $stop_codon} @stops) {
-	$results->{'END_MISSING_2'} = 1;
-      }
-      else {
-	$results->{'END_MISSING_1'} = $stop_codon;
+    if ($attributes{'cds_end_NF'}) {
+      if ($attributes{'cds_end_NF'}->value == 0 ) {
+	if (! grep {$_ eq $stop_codon} @stops) {
+#	  warn $trans->stable_id.": $coding_end--$trans_end--$stop_codon";
+#	  warn $trans->translation->end_Exon->end_phase;
+	  $results->{'END_MISSING'}{'WRONG'} = $stop_codon;
+	}
      }
    }
+    elsif (! grep {$_ eq $stop_codon} @stops) {
+      $results->{'END_MISSING'}{'ABSENT'} = $stop_codon;
+    }
  }
-  #extra CDS start not found remark
-  if ( ($attributes{'cds_start_NF'}->value == 1 )
-	 && ($coding_start != 1)
+  #extra CDS start not found remark 
+  if ( $attributes{'cds_start_NF'}) {
+    if ( ($attributes{'cds_start_NF'}->value == 1 )
 	   && ($start_codon eq 'ATG') ) {
-    $results->{'START_EXTRA'} = 1;
+      $results->{'START_EXTRA'} = $start_codon;
+    }
  }
  #missing CDS start not found remark
  if ( $coding_start == 1) {
-    if ( $attributes{'cds_start_NF'}->value == 0 ) {
-      if ($start_codon eq 'ATG') {
-	$results->{'START_MISSING_2'} = 1;
-      } else {
-	$results->{'START_MISSING_1'} = $start_codon;
+    if ( $attributes{'cds_start_NF'} ) {
+      if ( $attributes{'cds_start_NF'}->value == 0 ) {
+	if ($start_codon ne 'ATG') {
+	  $results->{'START_MISSING'}{'WRONG'} = $start_codon;
+	}
      }
    }
+    elsif ($start_codon ne 'ATG') {
+      $results->{'START_MISSING'}{'ABSENT'} = $start_codon;
+    }
  }
  return $results;
 }
@@ -206,6 +221,14 @@ sub check_for_stops {
    my $tsi = $trans->stable_id;
    my $tID = $trans->dbID;
    my $tname = $trans->get_all_Attributes('name')->[0]->value;
+
+    foreach my $rem (@{$trans->get_all_Attributes('hidden_remark')}) {
+      if ($rem->value =~ /not_for_Vega/) {
+	$support->log_verbose("Skipping transcript $tname ($tsi) since 'not_for_Vega'\n",1);
+	next TRANS;
+      }
+    }
+
    $support->log_verbose("Studying transcript $tsi ($tname, $tID)\n",1);

    my $peptide;