Commit 428d8859 authored by premanand17's avatar premanand17
Browse files

Updates to add projection parent as attributes in gtf and gff dumps - ENSCORESW-2003

parent be61b7c2
......@@ -1524,6 +1524,12 @@ sub summary_as_hash {
my $havana_gene = $self->havana_gene();
$summary_ref->{'havana_gene'} = $havana_gene->display_id() if defined $havana_gene;
$summary_ref->{'havana_version'} = $havana_gene->version() if defined $havana_gene;
## Stable identifier of the parent gene this gene was projected from
my $proj_parent_attributes = $self->get_all_Attributes("proj_parent_g");
if (@{$proj_parent_attributes}) {
$summary_ref->{'projection_parent_gene'} = $proj_parent_attributes->[0]->value;
}
return $summary_ref;
}
......
......@@ -3139,6 +3139,12 @@ sub summary_as_hash {
$summary_ref->{'ccdsid'} = $self->ccds->display_id() if $self->ccds();
$summary_ref->{'transcript_support_level'} = $self->tsl() if $self->tsl();
$summary_ref->{'tag'} = 'basic' if $self->gencode_basic();
## Stable identifier of the parent transcript this transcript was projected from
my $proj_parent_attributes = $self->get_all_Attributes("proj_parent_t");
if (@{$proj_parent_attributes}) {
$summary_ref->{'projection_parent_transcript'} = $proj_parent_attributes->[0]->value;
}
return $summary_ref;
}
......
......@@ -458,7 +458,7 @@ sub _print_attribs {
}
#add projection parent
my $proj_parent_attributes = $transcript->get_all_Attributes("proj_parent_g");
my $proj_parent_attributes = $gene->get_all_Attributes("proj_parent_g");
if (@{$proj_parent_attributes}) {
my $value = $proj_parent_attributes->[0]->value;
print $fh qq{ projection_parent_gene "${value}";};
......
......@@ -76,7 +76,7 @@ OUT
#Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t",
qw/20 ensembl region 30274334 30300924 . + ./,
'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;gene_id=ENSG00000131044;logic_name=ensembl;version=1'
'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1'
);
$expected .= "\n";
......@@ -122,7 +122,7 @@ OUT
#Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t",
qw/20 wibble region 30274334 30300924 . + ./,
'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;version=1'
'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1'
);
$expected .= "\n";
assert_gff3($gene, $expected, 'Gene with custom source serialises to GFF3 as expected. Source is wibble');
......@@ -132,7 +132,7 @@ OUT
OUT
$expected .= join("\t",
qw/20 ensembl region 30274334 30298904 . + ./,
'ID=transcript:ENST00000310998;Name=C20orf125;Parent=gene:ENSG00000131044;biotype=protein_coding;logic_name=ensembl;transcript_id=ENST00000310998;version=1'
'ID=transcript:ENST00000310998;Name=C20orf125;Parent=gene:ENSG00000131044;biotype=protein_coding;logic_name=ensembl;projection_parent_transcript=ENSG_PARENT_TRANSCRIPT;transcript_id=ENST00000310998;version=1'
);
$expected .= "\n";
assert_gff3($gene->canonical_transcript(), $expected, 'Transcript with custom source serialises to GFF3 as expected. Source is wibble');
......@@ -206,7 +206,7 @@ OUT
#Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t",
qw/20 ensembl region 30274334 30300924 . + ./,
'ID=gene:ENSG00000131044;Name=C20orf125;Dbxref=bibble,fibble;Ontology_term=GO:0001612;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;version=1'
'ID=gene:ENSG00000131044;Name=C20orf125;Dbxref=bibble,fibble;Ontology_term=GO:0001612;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1'
);
$expected .= "\n";
......
......@@ -72,14 +72,14 @@ my $db = $mtdb->get_DBAdaptor("core");
$gtf_serializer->print_Gene($gene);
my $gtf = <<GTF;
20\tensembl\tgene\t30274331\t30274404\t.\t+\t.\tgene_id \"GENE\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\";
20\tensembl\ttranscript\t30274331\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tSelenocysteine\t30274337\t30274339\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\texon\t30274331\t30274348\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"1\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; exon_id \"e1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tCDS\t30274334\t30274345\t.\t+\t0\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"1\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; protein_id \"PEP\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\texon\t30274401\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"2\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; exon_id \"e2\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tfive_prime_utr\t30274331\t30274333\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tthree_prime_utr\t30274346\t30274348\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tthree_prime_utr\t30274401\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\ttranscript\t30274331\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tSelenocysteine\t30274337\t30274339\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\texon\t30274331\t30274348\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"1\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; exon_id \"e1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tCDS\t30274334\t30274345\t.\t+\t0\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"1\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; protein_id \"PEP\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\texon\t30274401\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"2\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; exon_id \"e2\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tfive_prime_utr\t30274331\t30274333\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tthree_prime_utr\t30274346\t30274348\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tthree_prime_utr\t30274401\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
GTF
eq_or_diff(${$fh->string_ref}, $gtf, 'Checking custom Gene object dumps UTRs, Selenocysteine, seleno tag and CCDS');
}
......@@ -88,31 +88,31 @@ GTF
my $transcripts_gtf =
{
ENST00000310998 => "#!genome-version NCBI33
20\tensembl\ttranscript\t30274334\t30298904\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\";
20\tensembl\texon\t30274334\t30274425\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00001155821\"; exon_version \"1\";
20\tensembl\tCDS\t30274334\t30274425\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\";
20\tensembl\texon\t30284451\t30284562\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000859878\"; exon_version \"1\";
20\tensembl\tCDS\t30284451\t30284562\t.\t+\t1\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\";
20\tensembl\texon\t30285597\t30285782\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000661128\"; exon_version \"1\";
20\tensembl\tCDS\t30285597\t30285782\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\";
20\tensembl\texon\t30295724\t30295792\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991632\"; exon_version \"1\";
20\tensembl\tCDS\t30295724\t30295792\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\";
20\tensembl\texon\t30296506\t30296579\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"5\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00001155786\"; exon_version \"1\";
20\tensembl\tCDS\t30296506\t30296579\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"5\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\";
20\tensembl\texon\t30298823\t30298904\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"6\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00001180831\"; exon_version \"1\";
20\tensembl\tCDS\t30298823\t30298904\t.\t+\t1\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"6\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\";
20\tensembl\ttranscript\t30274334\t30298904\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
20\tensembl\texon\t30274334\t30274425\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00001155821\"; exon_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
20\tensembl\tCDS\t30274334\t30274425\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
20\tensembl\texon\t30284451\t30284562\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000859878\"; exon_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
20\tensembl\tCDS\t30284451\t30284562\t.\t+\t1\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
20\tensembl\texon\t30285597\t30285782\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000661128\"; exon_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
20\tensembl\tCDS\t30285597\t30285782\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
20\tensembl\texon\t30295724\t30295792\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991632\"; exon_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
20\tensembl\tCDS\t30295724\t30295792\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
20\tensembl\texon\t30296506\t30296579\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"5\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00001155786\"; exon_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
20\tensembl\tCDS\t30296506\t30296579\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"5\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
20\tensembl\texon\t30298823\t30298904\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"6\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00001180831\"; exon_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
20\tensembl\tCDS\t30298823\t30298904\t.\t+\t1\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000310998\"; transcript_version \"1\"; exon_number \"6\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"C20orf125\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000308980\"; protein_version \"1\"; projection_parent_transcript \"ENSG_PARENT_TRANSCRIPT\";
",
ENST00000278995 => "#!genome-version NCBI33
20\tensembl\ttranscript\t30285705\t30300924\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\";
20\tensembl\texon\t30285705\t30285782\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991635\"; exon_version \"1\";
20\tensembl\tCDS\t30285705\t30285782\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\";
20\tensembl\texon\t30295724\t30295792\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991632\"; exon_version \"1\";
20\tensembl\tCDS\t30295724\t30295792\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\";
20\tensembl\texon\t30298823\t30298913\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991636\"; exon_version \"1\";
20\tensembl\tCDS\t30298823\t30298913\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\";
20\tensembl\texon\t30300869\t30300924\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991637\"; exon_version \"1\";
20\tensembl\tCDS\t30300869\t30300924\t.\t+\t2\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\";
20\tensembl\ttranscript\t30285705\t30300924\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\";
20\tensembl\texon\t30285705\t30285782\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991635\"; exon_version \"1\";
20\tensembl\tCDS\t30285705\t30285782\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\";
20\tensembl\texon\t30295724\t30295792\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991632\"; exon_version \"1\";
20\tensembl\tCDS\t30295724\t30295792\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\";
20\tensembl\texon\t30298823\t30298913\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991636\"; exon_version \"1\";
20\tensembl\tCDS\t30298823\t30298913\t.\t+\t0\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\";
20\tensembl\texon\t30300869\t30300924\t.\t+\t.\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000991637\"; exon_version \"1\";
20\tensembl\tCDS\t30300869\t30300924\t.\t+\t2\tgene_id \"ENSG00000131044\"; gene_version \"1\"; transcript_id \"ENST00000278995\"; transcript_version \"1\"; exon_number \"4\"; gene_name \"C20orf125\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\"; transcript_name \"Q9BR18\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000278995\"; protein_version \"1\";
",
ENST00000252021 => "#!genome-version NCBI33
......
......@@ -17,3 +17,5 @@
17 codon_table Codon Table Alternate codon table
18 non_ref Non Reference Non Reference Sequence Region
19 karyotype_rank Rank in the karyotype For a given seq_region, if it is part of the species karyotype, will indicate its rank
20 proj_parent_g projection parent gene Stable identifier of the parent gene this gene was projected from (projection between different species and/or assemblies).
21 proj_parent_t projection parent transcript Stable identifier of the parent transcript this transcript was projected from (projection between different species and/or assemblies).
18256 10 This is a note on gene 18256
18259 10 Another note on gene 18259
18256 20 ENSG_PARENT_GENE
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment