From 694f7b645b5a69232be9b5773c3d9f3f6a57cce0 Mon Sep 17 00:00:00 2001
From: Magali Ruffier <mr6@ebi.ac.uk>
Date: Fri, 15 May 2015 13:35:33 +0100
Subject: [PATCH] better SO description and updated tests

---
 .../EnsEMBL/Utils/SequenceOntologyMapper.pm   | 54 +++++++++----------
 modules/t/sequenceOntologyMapper.t            |  2 +-
 .../ontology/ontology/term.txt                |  1 +
 3 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm b/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm
index 39c96c0667..76d1086452 100644
--- a/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm
+++ b/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm
@@ -88,47 +88,47 @@ my %transcript_so_mapping =
    'transcribed_unprocessed_pseudogene'=> 'SO:0000516', # pseudogenic_transcript
    'processed_pseudogene' 				=> 'SO:0000043', # processed_pseudogene
    'unprocessed_pseudogene' 			=> 'SO:0000336', # pseudogene
-   'unitary_pseudogene'				=> 'SO:0000336',
+   'unitary_pseudogene'				=> 'SO:0000336',        # pseudogene
    'pseudogene' 						=> 'SO:0000336', # pseudogene
-   'transcribed_processed_pseudogene'	=> 'SO:0000043', 
+   'transcribed_processed_pseudogene'	=> 'SO:0000043',                # processed_pseudogene
    'retrotransposed' 					=> 'SO:0000569', #retrotransposed
-   'ncrna_host' 						=> 'SO:0000483',
-   'polymorphic_pseudogene'			=> 'SO:0000336',
-   'lincRNA'							=> 'SO:0001463',
-   'ncrna_host'						=> 'SO:0000483',
-   '3prime_overlapping_ncrna'			=> 'SO:0000483',
-   'TR_V_gene'							=> 'SO:0000466',
-   'TR_V_pseudogene'					=> 'SO:0000336',
+   'ncrna_host' 						=> 'SO:0000483', # nc_primary_transcript
+   'polymorphic_pseudogene'			=> 'SO:0000336',        # pseudogene
+   'lincRNA'							=> 'SO:0001463', # lincRNA
+   'ncrna_host'						=> 'SO:0000483', # nc_primary_transcript
+   '3prime_overlapping_ncrna'			=> 'SO:0000483',         # nc_primary_transcript
+   'TR_V_gene'							=> 'SO:0000466', # V_gene_segment
+   'TR_V_pseudogene'					=> 'SO:0000336', # pseudogene
    'TR_J_gene'							=> 'SO:0000470',
    'IG_C_gene'							=> 'SO:0000478',
-   'IG_C_pseudogene'					=> 'SO:0000336',
-   'TR_C_gene'							=> 'SO:0000478',
-   'IG_J_pseudogene'					=> 'SO:0000336',
+   'IG_C_pseudogene'					=> 'SO:0000336', # pseudogene
+   'TR_C_gene'							=> 'SO:0000478', # C_gene_segment
+   'IG_J_pseudogene'					=> 'SO:0000336', # pseudogene
    'miRNA'								=> 'SO:0000276', #miRNA
-   'miRNA_pseudogene'					=> 'SO:0000336',
+   'miRNA_pseudogene'					=> 'SO:0000336', # pseudogene
    'disrupted_domain' 					=> 'SO:0000681', # aberrant_processed_transcript
    'rRNA' 								=> 'SO:0000252', #rRNA
-   'rRNA_pseudogene'					=> 'SO:0000777', 
-   'scRNA_pseudogene'					=> 'SO:0000336',
+   'rRNA_pseudogene'					=> 'SO:0000777', # pseudogenic_rRNA
+   'scRNA_pseudogene'					=> 'SO:0000336', # pseudogene
    'snoRNA' 							=> 'SO:0000275', # snoRNA
-   'snoRNA_pseudogene'					=> 'SO:0000336',
+   'snoRNA_pseudogene'					=> 'SO:0000336', # pseudogene
    'snRNA'								=> 'SO:0000274', # snRNA
-   'snRNA_pseudogene'					=> 'SO:0000336', 
+   'snRNA_pseudogene'					=> 'SO:0000336',  # pseudogene
   );
 
 my %utr_so_mapping =
   (
-   'UTR'             => 'SO:0000203',
-   'five_prime_utr'  => 'SO:0000204',
-   'three_prime_utr' => 'SO:0000205'
+   'UTR'             => 'SO:0000203', # UTR
+   'five_prime_utr'  => 'SO:0000204', # five_prime_UTR
+   'three_prime_utr' => 'SO:0000205'  # three_prime_UTR
   );
 
 my %region_so_mapping =
   (
-   'chromosome'  => 'SO:0000340',
-   'supercontig' => 'SO:0000148',
-   'scaffold'    => 'SO:0000148',
-   'contig'      => 'SO:0000149'
+   'chromosome'  => 'SO:0000340', # chromosome
+   'supercontig' => 'SO:0000148', # supercontig
+   'scaffold'    => 'SO:0000148', # supercontig
+   'contig'      => 'SO:0000149'  # contig
   );
 
 my %feature_so_mapping = 
@@ -137,9 +137,9 @@ my %feature_so_mapping =
    'Bio::EnsEMBL::Gene' => 'SO:0000704',    # gene
    'Bio::EnsEMBL::Transcript' => 'SO:0000673', # transcript
    'Bio::EnsEMBL::Exon' => 'SO:0000147',       # exon
-   'Bio::EnsEMBL::UTR'  => 'SO:0000203',
-   'Bio::EnsEMBL::ExonTranscript' => 'SO:0000147',
-   'Bio::EnsEMBL::CDS'   => 'SO:0000316',
+   'Bio::EnsEMBL::UTR'  => 'SO:0000203',       # UTR
+   'Bio::EnsEMBL::ExonTranscript' => 'SO:0000147', # Exon
+   'Bio::EnsEMBL::CDS'   => 'SO:0000316',      # CDS
    'Bio::EnsEMBL::Slice' => 'SO:0000001',      # region
    'Bio::EnsEMBL::SimpleFeature' => 'SO:0001411', # biological_region
    'Bio::EnsEMBL::MiscFeature' => 'SO:0001411',	  # biological_region
diff --git a/modules/t/sequenceOntologyMapper.t b/modules/t/sequenceOntologyMapper.t
index 1aec5bfba5..b6c37236d2 100644
--- a/modules/t/sequenceOntologyMapper.t
+++ b/modules/t/sequenceOntologyMapper.t
@@ -77,7 +77,7 @@ my $mappings =
    # exons
    { obj => Bio::EnsEMBL::Exon->new, accession => 'SO:0000147', name => 'exon' },
    # slices
-   { obj => $db->get_SliceAdaptor->fetch_by_region('chromosome', '20', 30_270_000, 31_200_000), accession => 'SO:0000001', name => 'region' },
+   { obj => $db->get_SliceAdaptor->fetch_by_region('chromosome', '20', 30_270_000, 31_200_000), accession => 'SO:0000340', name => 'chromosome' },
    # simple features
    { obj => Bio::EnsEMBL::SimpleFeature->new(), accession => 'SO:0001411', name => 'biological_region' },
    # misc features
diff --git a/modules/t/test-genome-DBs/ontology/ontology/term.txt b/modules/t/test-genome-DBs/ontology/ontology/term.txt
index a1135770ff..c572b72cc4 100644
--- a/modules/t/test-genome-DBs/ontology/ontology/term.txt
+++ b/modules/t/test-genome-DBs/ontology/ontology/term.txt
@@ -161,3 +161,4 @@
 40788	5	NULL	SO:0001217	protein_coding_gene	""""" []"	0	0
 40842	5	NULL	SO:0001272	tRNA_gene	""""" []"	0	0
 41072	5	NULL	SO:0001503	processed_transcript	"""A transcript for which no open reading frame has been identified and for which no other function has been determined."" [MGI:hdeen]"	0	0
+41494	5	SOFA	SO:0000340	chromosome	"Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma]	0	0
-- 
GitLab