diff --git a/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm b/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm index 39c96c0667ef9aaebf42a9b1945ec031c0f687c8..76d1086452be54737870f7c2a3a7c4e25cd2ebb7 100644 --- a/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm +++ b/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm @@ -88,47 +88,47 @@ my %transcript_so_mapping = 'transcribed_unprocessed_pseudogene'=> 'SO:0000516', # pseudogenic_transcript 'processed_pseudogene' => 'SO:0000043', # processed_pseudogene 'unprocessed_pseudogene' => 'SO:0000336', # pseudogene - 'unitary_pseudogene' => 'SO:0000336', + 'unitary_pseudogene' => 'SO:0000336', # pseudogene 'pseudogene' => 'SO:0000336', # pseudogene - 'transcribed_processed_pseudogene' => 'SO:0000043', + 'transcribed_processed_pseudogene' => 'SO:0000043', # processed_pseudogene 'retrotransposed' => 'SO:0000569', #retrotransposed - 'ncrna_host' => 'SO:0000483', - 'polymorphic_pseudogene' => 'SO:0000336', - 'lincRNA' => 'SO:0001463', - 'ncrna_host' => 'SO:0000483', - '3prime_overlapping_ncrna' => 'SO:0000483', - 'TR_V_gene' => 'SO:0000466', - 'TR_V_pseudogene' => 'SO:0000336', + 'ncrna_host' => 'SO:0000483', # nc_primary_transcript + 'polymorphic_pseudogene' => 'SO:0000336', # pseudogene + 'lincRNA' => 'SO:0001463', # lincRNA + 'ncrna_host' => 'SO:0000483', # nc_primary_transcript + '3prime_overlapping_ncrna' => 'SO:0000483', # nc_primary_transcript + 'TR_V_gene' => 'SO:0000466', # V_gene_segment + 'TR_V_pseudogene' => 'SO:0000336', # pseudogene 'TR_J_gene' => 'SO:0000470', 'IG_C_gene' => 'SO:0000478', - 'IG_C_pseudogene' => 'SO:0000336', - 'TR_C_gene' => 'SO:0000478', - 'IG_J_pseudogene' => 'SO:0000336', + 'IG_C_pseudogene' => 'SO:0000336', # pseudogene + 'TR_C_gene' => 'SO:0000478', # C_gene_segment + 'IG_J_pseudogene' => 'SO:0000336', # pseudogene 'miRNA' => 'SO:0000276', #miRNA - 'miRNA_pseudogene' => 'SO:0000336', + 'miRNA_pseudogene' => 'SO:0000336', # pseudogene 'disrupted_domain' => 'SO:0000681', # aberrant_processed_transcript 'rRNA' => 'SO:0000252', #rRNA - 'rRNA_pseudogene' => 'SO:0000777', - 'scRNA_pseudogene' => 'SO:0000336', + 'rRNA_pseudogene' => 'SO:0000777', # pseudogenic_rRNA + 'scRNA_pseudogene' => 'SO:0000336', # pseudogene 'snoRNA' => 'SO:0000275', # snoRNA - 'snoRNA_pseudogene' => 'SO:0000336', + 'snoRNA_pseudogene' => 'SO:0000336', # pseudogene 'snRNA' => 'SO:0000274', # snRNA - 'snRNA_pseudogene' => 'SO:0000336', + 'snRNA_pseudogene' => 'SO:0000336', # pseudogene ); my %utr_so_mapping = ( - 'UTR' => 'SO:0000203', - 'five_prime_utr' => 'SO:0000204', - 'three_prime_utr' => 'SO:0000205' + 'UTR' => 'SO:0000203', # UTR + 'five_prime_utr' => 'SO:0000204', # five_prime_UTR + 'three_prime_utr' => 'SO:0000205' # three_prime_UTR ); my %region_so_mapping = ( - 'chromosome' => 'SO:0000340', - 'supercontig' => 'SO:0000148', - 'scaffold' => 'SO:0000148', - 'contig' => 'SO:0000149' + 'chromosome' => 'SO:0000340', # chromosome + 'supercontig' => 'SO:0000148', # supercontig + 'scaffold' => 'SO:0000148', # supercontig + 'contig' => 'SO:0000149' # contig ); my %feature_so_mapping = @@ -137,9 +137,9 @@ my %feature_so_mapping = 'Bio::EnsEMBL::Gene' => 'SO:0000704', # gene 'Bio::EnsEMBL::Transcript' => 'SO:0000673', # transcript 'Bio::EnsEMBL::Exon' => 'SO:0000147', # exon - 'Bio::EnsEMBL::UTR' => 'SO:0000203', - 'Bio::EnsEMBL::ExonTranscript' => 'SO:0000147', - 'Bio::EnsEMBL::CDS' => 'SO:0000316', + 'Bio::EnsEMBL::UTR' => 'SO:0000203', # UTR + 'Bio::EnsEMBL::ExonTranscript' => 'SO:0000147', # Exon + 'Bio::EnsEMBL::CDS' => 'SO:0000316', # CDS 'Bio::EnsEMBL::Slice' => 'SO:0000001', # region 'Bio::EnsEMBL::SimpleFeature' => 'SO:0001411', # biological_region 'Bio::EnsEMBL::MiscFeature' => 'SO:0001411', # biological_region diff --git a/modules/t/sequenceOntologyMapper.t b/modules/t/sequenceOntologyMapper.t index 1aec5bfba5a24ee0055483faab245bf055498d0b..b6c37236d2dea2de0a0e67387c6ca205b6d91d25 100644 --- a/modules/t/sequenceOntologyMapper.t +++ b/modules/t/sequenceOntologyMapper.t @@ -77,7 +77,7 @@ my $mappings = # exons { obj => Bio::EnsEMBL::Exon->new, accession => 'SO:0000147', name => 'exon' }, # slices - { obj => $db->get_SliceAdaptor->fetch_by_region('chromosome', '20', 30_270_000, 31_200_000), accession => 'SO:0000001', name => 'region' }, + { obj => $db->get_SliceAdaptor->fetch_by_region('chromosome', '20', 30_270_000, 31_200_000), accession => 'SO:0000340', name => 'chromosome' }, # simple features { obj => Bio::EnsEMBL::SimpleFeature->new(), accession => 'SO:0001411', name => 'biological_region' }, # misc features diff --git a/modules/t/test-genome-DBs/ontology/ontology/term.txt b/modules/t/test-genome-DBs/ontology/ontology/term.txt index a1135770ff821a14df36e70115aad6ce5f852a91..c572b72cc4ebb9169c5cd9e7b42f195a5b501800 100644 --- a/modules/t/test-genome-DBs/ontology/ontology/term.txt +++ b/modules/t/test-genome-DBs/ontology/ontology/term.txt @@ -161,3 +161,4 @@ 40788 5 NULL SO:0001217 protein_coding_gene """"" []" 0 0 40842 5 NULL SO:0001272 tRNA_gene """"" []" 0 0 41072 5 NULL SO:0001503 processed_transcript """A transcript for which no open reading frame has been identified and for which no other function has been determined."" [MGI:hdeen]" 0 0 +41494 5 SOFA SO:0000340 chromosome "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] 0 0