diff --git a/modules/Bio/EnsEMBL/Gene.pm b/modules/Bio/EnsEMBL/Gene.pm index 16840e0e35e166582af8d0d8ae98090feedb3fc8..78453a4972e4b5ac3f7623764680061b51ab42ee 100755 --- a/modules/Bio/EnsEMBL/Gene.pm +++ b/modules/Bio/EnsEMBL/Gene.pm @@ -1524,6 +1524,12 @@ sub summary_as_hash { my $havana_gene = $self->havana_gene(); $summary_ref->{'havana_gene'} = $havana_gene->display_id() if defined $havana_gene; $summary_ref->{'havana_version'} = $havana_gene->version() if defined $havana_gene; + + ## Stable identifier of the parent gene this gene was projected from + my $proj_parent_attributes = $self->get_all_Attributes("proj_parent_g"); + if (@{$proj_parent_attributes}) { + $summary_ref->{'projection_parent_gene'} = $proj_parent_attributes->[0]->value; + } return $summary_ref; } diff --git a/modules/Bio/EnsEMBL/Transcript.pm b/modules/Bio/EnsEMBL/Transcript.pm index cfdc9c4bc09a8837b8cfd6abfbc45b8055d5c383..ead059733956e7b1cc01f55154ae1e4d69ffeed1 100755 --- a/modules/Bio/EnsEMBL/Transcript.pm +++ b/modules/Bio/EnsEMBL/Transcript.pm @@ -3139,6 +3139,12 @@ sub summary_as_hash { $summary_ref->{'ccdsid'} = $self->ccds->display_id() if $self->ccds(); $summary_ref->{'transcript_support_level'} = $self->tsl() if $self->tsl(); $summary_ref->{'tag'} = 'basic' if $self->gencode_basic(); + + ## Stable identifier of the parent transcript this transcript was projected from + my $proj_parent_attributes = $self->get_all_Attributes("proj_parent_t"); + if (@{$proj_parent_attributes}) { + $summary_ref->{'projection_parent_transcript'} = $proj_parent_attributes->[0]->value; + } return $summary_ref; } diff --git a/modules/Bio/EnsEMBL/Utils/IO/GTFSerializer.pm b/modules/Bio/EnsEMBL/Utils/IO/GTFSerializer.pm index 7c19ff49a05a47f0173dc0a257615888e040e53b..9bb4ec5a19c0d3a0f61f96443a85eae61c1f0da2 100644 --- a/modules/Bio/EnsEMBL/Utils/IO/GTFSerializer.pm +++ b/modules/Bio/EnsEMBL/Utils/IO/GTFSerializer.pm @@ -458,7 +458,7 @@ sub _print_attribs { } #add projection parent - my $proj_parent_attributes = $transcript->get_all_Attributes("proj_parent_g"); + my $proj_parent_attributes = $gene->get_all_Attributes("proj_parent_g"); if (@{$proj_parent_attributes}) { my $value = $proj_parent_attributes->[0]->value; print $fh qq{ projection_parent_gene "${value}";}; diff --git a/modules/t/gffSerialiser.t b/modules/t/gffSerialiser.t index 21f68991a49a6d1455126fc03a98df17fe0dc937..bfaa50befc33243c01c9f74dad84732c089f2195 100644 --- a/modules/t/gffSerialiser.t +++ b/modules/t/gffSerialiser.t @@ -76,7 +76,7 @@ OUT #Have to do this outside of the HERETO thanks to tabs $expected .= join("\t", qw/20 ensembl region 30274334 30300924 . + ./, - 'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;gene_id=ENSG00000131044;logic_name=ensembl;version=1' + 'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1' ); $expected .= "\n"; @@ -122,7 +122,7 @@ OUT #Have to do this outside of the HERETO thanks to tabs $expected .= join("\t", qw/20 wibble region 30274334 30300924 . + ./, - 'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;version=1' + 'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1' ); $expected .= "\n"; assert_gff3($gene, $expected, 'Gene with custom source serialises to GFF3 as expected. Source is wibble'); @@ -132,7 +132,7 @@ OUT OUT $expected .= join("\t", qw/20 ensembl region 30274334 30298904 . + ./, - 'ID=transcript:ENST00000310998;Name=C20orf125;Parent=gene:ENSG00000131044;biotype=protein_coding;logic_name=ensembl;transcript_id=ENST00000310998;version=1' + 'ID=transcript:ENST00000310998;Name=C20orf125;Parent=gene:ENSG00000131044;biotype=protein_coding;logic_name=ensembl;projection_parent_transcript=ENSG_PARENT_TRANSCRIPT;transcript_id=ENST00000310998;version=1' ); $expected .= "\n"; assert_gff3($gene->canonical_transcript(), $expected, 'Transcript with custom source serialises to GFF3 as expected. Source is wibble'); @@ -206,7 +206,7 @@ OUT #Have to do this outside of the HERETO thanks to tabs $expected .= join("\t", qw/20 ensembl region 30274334 30300924 . + ./, - 'ID=gene:ENSG00000131044;Name=C20orf125;Dbxref=bibble,fibble;Ontology_term=GO:0001612;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;version=1' + 'ID=gene:ENSG00000131044;Name=C20orf125;Dbxref=bibble,fibble;Ontology_term=GO:0001612;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1' ); $expected .= "\n"; diff --git a/modules/t/gtfSerialiser.t b/modules/t/gtfSerialiser.t index f359558030f97564e6208f3eac6aa3f470059c0a..1de1d8eabafa82aeb98f38462e049076605123e1 100644 --- a/modules/t/gtfSerialiser.t +++ b/modules/t/gtfSerialiser.t @@ -72,14 +72,14 @@ my $db = $mtdb->get_DBAdaptor("core"); $gtf_serializer->print_Gene($gene); my $gtf = <<GTF; 20\tensembl\tgene\t30274331\t30274404\t.\t+\t.\tgene_id \"GENE\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; -20\tensembl\ttranscript\t30274331\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; -20\tensembl\tSelenocysteine\t30274337\t30274339\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; -20\tensembl\texon\t30274331\t30274348\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"1\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; exon_id \"e1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; -20\tensembl\tCDS\t30274334\t30274345\t.\t+\t0\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"1\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; protein_id \"PEP\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; -20\tensembl\texon\t30274401\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"2\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; exon_id \"e2\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; -20\tensembl\tfive_prime_utr\t30274331\t30274333\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; -20\tensembl\tthree_prime_utr\t30274346\t30274348\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; -20\tensembl\tthree_prime_utr\t30274401\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; +20\tensembl\ttranscript\t30274331\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; +20\tensembl\tSelenocysteine\t30274337\t30274339\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; +20\tensembl\texon\t30274331\t30274348\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"1\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; exon_id \"e1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; +20\tensembl\tCDS\t30274334\t30274345\t.\t+\t0\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"1\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; protein_id \"PEP\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; +20\tensembl\texon\t30274401\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"2\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; exon_id \"e2\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; +20\tensembl\tfive_prime_utr\t30274331\t30274333\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; +20\tensembl\tthree_prime_utr\t30274346\t30274348\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; +20\tensembl\tthree_prime_utr\t30274401\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT"; GTF eq_or_diff(${$fh->string_ref}, $gtf, 'Checking custom Gene object dumps UTRs, Selenocysteine, seleno tag and CCDS'); } @@ -88,31 +88,31 @@ GTF my $transcripts_gtf = { ENST00000310998 => "#!genome-version NCBI33 -20\tensembl\ttranscript\t30274334\t30298904\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; -20\tensembl\texon\t30274334\t30274425\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00001155821\"; exon_version \"1\"; -20\tensembl\tCDS\t30274334\t30274425\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; -20\tensembl\texon\t30284451\t30284562\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000859878\"; exon_version \"1\"; -20\tensembl\tCDS\t30284451\t30284562\t.\t+\t1\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; -20\tensembl\texon\t30285597\t30285782\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000661128\"; exon_version \"1\"; -20\tensembl\tCDS\t30285597\t30285782\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; -20\tensembl\texon\t30295724\t30295792\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991632\"; exon_version \"1\"; -20\tensembl\tCDS\t30295724\t30295792\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; -20\tensembl\texon\t30296506\t30296579\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"5\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00001155786\"; exon_version \"1\"; -20\tensembl\tCDS\t30296506\t30296579\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"5\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; -20\tensembl\texon\t30298823\t30298904\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"6\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00001180831\"; exon_version \"1\"; -20\tensembl\tCDS\t30298823\t30298904\t.\t+\t1\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"6\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; +20\tensembl\ttranscript\t30274334\t30298904\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; +20\tensembl\texon\t30274334\t30274425\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00001155821\"; exon_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; +20\tensembl\tCDS\t30274334\t30274425\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; +20\tensembl\texon\t30284451\t30284562\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000859878\"; exon_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; +20\tensembl\tCDS\t30284451\t30284562\t.\t+\t1\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; +20\tensembl\texon\t30285597\t30285782\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000661128\"; exon_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; +20\tensembl\tCDS\t30285597\t30285782\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; +20\tensembl\texon\t30295724\t30295792\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991632\"; exon_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; +20\tensembl\tCDS\t30295724\t30295792\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; +20\tensembl\texon\t30296506\t30296579\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"5\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00001155786\"; exon_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; +20\tensembl\tCDS\t30296506\t30296579\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"5\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; +20\tensembl\texon\t30298823\t30298904\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"6\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00001180831\"; exon_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; +20\tensembl\tCDS\t30298823\t30298904\t.\t+\t1\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"6\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\"; ", ENST00000278995 => "#!genome-version NCBI33 -20\tensembl\ttranscript\t30285705\t30300924\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; -20\tensembl\texon\t30285705\t30285782\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991635\"; exon_version \"1\"; -20\tensembl\tCDS\t30285705\t30285782\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\"; -20\tensembl\texon\t30295724\t30295792\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991632\"; exon_version \"1\"; -20\tensembl\tCDS\t30295724\t30295792\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\"; -20\tensembl\texon\t30298823\t30298913\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991636\"; exon_version \"1\"; -20\tensembl\tCDS\t30298823\t30298913\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\"; -20\tensembl\texon\t30300869\t30300924\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991637\"; exon_version \"1\"; -20\tensembl\tCDS\t30300869\t30300924\t.\t+\t2\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\"; +20\tensembl\ttranscript\t30285705\t30300924\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; +20\tensembl\texon\t30285705\t30285782\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991635\"; exon_version \"1\"; +20\tensembl\tCDS\t30285705\t30285782\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\"; +20\tensembl\texon\t30295724\t30295792\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991632\"; exon_version \"1\"; +20\tensembl\tCDS\t30295724\t30295792\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\"; +20\tensembl\texon\t30298823\t30298913\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991636\"; exon_version \"1\"; +20\tensembl\tCDS\t30298823\t30298913\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\"; +20\tensembl\texon\t30300869\t30300924\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991637\"; exon_version \"1\"; +20\tensembl\tCDS\t30300869\t30300924\t.\t+\t2\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\"; ", ENST00000252021 => "#!genome-version NCBI33 diff --git a/modules/t/test-genome-DBs/homo_sapiens/core/attrib_type.txt b/modules/t/test-genome-DBs/homo_sapiens/core/attrib_type.txt index f759011304d4dd87424580b6d3056426d47da540..5f90a270893c236f703383204dad15cb2679c526 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/core/attrib_type.txt +++ b/modules/t/test-genome-DBs/homo_sapiens/core/attrib_type.txt @@ -17,3 +17,5 @@ 17 codon_table Codon Table Alternate codon table 18 non_ref Non Reference Non Reference Sequence Region 19 karyotype_rank Rank in the karyotype For a given seq_region, if it is part of the species karyotype, will indicate its rank +20 proj_parent_g projection parent gene Stable identifier of the parent gene this gene was projected from (projection between different species and/or assemblies). +21 proj_parent_t projection parent transcript Stable identifier of the parent transcript this transcript was projected from (projection between different species and/or assemblies). diff --git a/modules/t/test-genome-DBs/homo_sapiens/core/gene_attrib.txt b/modules/t/test-genome-DBs/homo_sapiens/core/gene_attrib.txt index 20d48d3f602f5bf10340fd31783fc15c3033aa48..1ee5e9aeac8f80c935838aa997fa35b2d706f52b 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/core/gene_attrib.txt +++ b/modules/t/test-genome-DBs/homo_sapiens/core/gene_attrib.txt @@ -1,2 +1,3 @@ 18256 10 This is a note on gene 18256 18259 10 Another note on gene 18259 +18256 20 ENSG_PARENT_GENE diff --git a/modules/t/test-genome-DBs/homo_sapiens/core/transcript_attrib.txt b/modules/t/test-genome-DBs/homo_sapiens/core/transcript_attrib.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fe889b722f1a79bff021c6a16a2ab6acdeff486 --- /dev/null +++ b/modules/t/test-genome-DBs/homo_sapiens/core/transcript_attrib.txt @@ -0,0 +1 @@ +21716 21 ENSG_PARENT_TRANSCRIPT