Skip to content
Snippets Groups Projects
Commit 428d8859 authored by premanand17's avatar premanand17
Browse files

Updates to add projection parent as attributes in gtf and gff dumps - ENSCORESW-2003

parent be61b7c2
No related branches found
No related tags found
2 merge requests!169Updates to add projection parent as attributes in gtf and gff dumps -…,!169Updates to add projection parent as attributes in gtf and gff dumps -…
......@@ -1524,6 +1524,12 @@ sub summary_as_hash {
my $havana_gene = $self->havana_gene();
$summary_ref->{'havana_gene'} = $havana_gene->display_id() if defined $havana_gene;
$summary_ref->{'havana_version'} = $havana_gene->version() if defined $havana_gene;
## Stable identifier of the parent gene this gene was projected from
my $proj_parent_attributes = $self->get_all_Attributes("proj_parent_g");
if (@{$proj_parent_attributes}) {
$summary_ref->{'projection_parent_gene'} = $proj_parent_attributes->[0]->value;
}
return $summary_ref;
}
......
......@@ -3139,6 +3139,12 @@ sub summary_as_hash {
$summary_ref->{'ccdsid'} = $self->ccds->display_id() if $self->ccds();
$summary_ref->{'transcript_support_level'} = $self->tsl() if $self->tsl();
$summary_ref->{'tag'} = 'basic' if $self->gencode_basic();
## Stable identifier of the parent transcript this transcript was projected from
my $proj_parent_attributes = $self->get_all_Attributes("proj_parent_t");
if (@{$proj_parent_attributes}) {
$summary_ref->{'projection_parent_transcript'} = $proj_parent_attributes->[0]->value;
}
return $summary_ref;
}
......
......@@ -458,7 +458,7 @@ sub _print_attribs {
}
#add projection parent
my $proj_parent_attributes = $transcript->get_all_Attributes("proj_parent_g");
my $proj_parent_attributes = $gene->get_all_Attributes("proj_parent_g");
if (@{$proj_parent_attributes}) {
my $value = $proj_parent_attributes->[0]->value;
print $fh qq{ projection_parent_gene "${value}";};
......
......@@ -76,7 +76,7 @@ OUT
#Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t",
qw/20 ensembl region 30274334 30300924 . + ./,
'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;gene_id=ENSG00000131044;logic_name=ensembl;version=1'
'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1'
);
$expected .= "\n";
......@@ -122,7 +122,7 @@ OUT
#Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t",
qw/20 wibble region 30274334 30300924 . + ./,
'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;version=1'
'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1'
);
$expected .= "\n";
assert_gff3($gene, $expected, 'Gene with custom source serialises to GFF3 as expected. Source is wibble');
......@@ -132,7 +132,7 @@ OUT
OUT
$expected .= join("\t",
qw/20 ensembl region 30274334 30298904 . + ./,
'ID=transcript:ENST00000310998;Name=C20orf125;Parent=gene:ENSG00000131044;biotype=protein_coding;logic_name=ensembl;transcript_id=ENST00000310998;version=1'
'ID=transcript:ENST00000310998;Name=C20orf125;Parent=gene:ENSG00000131044;biotype=protein_coding;logic_name=ensembl;projection_parent_transcript=ENSG_PARENT_TRANSCRIPT;transcript_id=ENST00000310998;version=1'
);
$expected .= "\n";
assert_gff3($gene->canonical_transcript(), $expected, 'Transcript with custom source serialises to GFF3 as expected. Source is wibble');
......@@ -206,7 +206,7 @@ OUT
#Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t",
qw/20 ensembl region 30274334 30300924 . + ./,
'ID=gene:ENSG00000131044;Name=C20orf125;Dbxref=bibble,fibble;Ontology_term=GO:0001612;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;version=1'
'ID=gene:ENSG00000131044;Name=C20orf125;Dbxref=bibble,fibble;Ontology_term=GO:0001612;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1'
);
$expected .= "\n";
......
This diff is collapsed.
......@@ -17,3 +17,5 @@
17 codon_table Codon Table Alternate codon table
18 non_ref Non Reference Non Reference Sequence Region
19 karyotype_rank Rank in the karyotype For a given seq_region, if it is part of the species karyotype, will indicate its rank
20 proj_parent_g projection parent gene Stable identifier of the parent gene this gene was projected from (projection between different species and/or assemblies).
21 proj_parent_t projection parent transcript Stable identifier of the parent transcript this transcript was projected from (projection between different species and/or assemblies).
18256 10 This is a note on gene 18256
18259 10 Another note on gene 18259
18256 20 ENSG_PARENT_GENE
21716 21 ENSG_PARENT_TRANSCRIPT
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment