Commit 428d8859 authored by premanand17's avatar premanand17
Browse files

Updates to add projection parent as attributes in gtf and gff dumps - ENSCORESW-2003

parent be61b7c2
...@@ -1524,6 +1524,12 @@ sub summary_as_hash { ...@@ -1524,6 +1524,12 @@ sub summary_as_hash {
my $havana_gene = $self->havana_gene(); my $havana_gene = $self->havana_gene();
$summary_ref->{'havana_gene'} = $havana_gene->display_id() if defined $havana_gene; $summary_ref->{'havana_gene'} = $havana_gene->display_id() if defined $havana_gene;
$summary_ref->{'havana_version'} = $havana_gene->version() if defined $havana_gene; $summary_ref->{'havana_version'} = $havana_gene->version() if defined $havana_gene;
## Stable identifier of the parent gene this gene was projected from
my $proj_parent_attributes = $self->get_all_Attributes("proj_parent_g");
if (@{$proj_parent_attributes}) {
$summary_ref->{'projection_parent_gene'} = $proj_parent_attributes->[0]->value;
}
return $summary_ref; return $summary_ref;
} }
......
...@@ -3139,6 +3139,12 @@ sub summary_as_hash { ...@@ -3139,6 +3139,12 @@ sub summary_as_hash {
$summary_ref->{'ccdsid'} = $self->ccds->display_id() if $self->ccds(); $summary_ref->{'ccdsid'} = $self->ccds->display_id() if $self->ccds();
$summary_ref->{'transcript_support_level'} = $self->tsl() if $self->tsl(); $summary_ref->{'transcript_support_level'} = $self->tsl() if $self->tsl();
$summary_ref->{'tag'} = 'basic' if $self->gencode_basic(); $summary_ref->{'tag'} = 'basic' if $self->gencode_basic();
## Stable identifier of the parent transcript this transcript was projected from
my $proj_parent_attributes = $self->get_all_Attributes("proj_parent_t");
if (@{$proj_parent_attributes}) {
$summary_ref->{'projection_parent_transcript'} = $proj_parent_attributes->[0]->value;
}
return $summary_ref; return $summary_ref;
} }
......
...@@ -458,7 +458,7 @@ sub _print_attribs { ...@@ -458,7 +458,7 @@ sub _print_attribs {
} }
#add projection parent #add projection parent
my $proj_parent_attributes = $transcript->get_all_Attributes("proj_parent_g"); my $proj_parent_attributes = $gene->get_all_Attributes("proj_parent_g");
if (@{$proj_parent_attributes}) { if (@{$proj_parent_attributes}) {
my $value = $proj_parent_attributes->[0]->value; my $value = $proj_parent_attributes->[0]->value;
print $fh qq{ projection_parent_gene "${value}";}; print $fh qq{ projection_parent_gene "${value}";};
......
...@@ -76,7 +76,7 @@ OUT ...@@ -76,7 +76,7 @@ OUT
#Have to do this outside of the HERETO thanks to tabs #Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t", $expected .= join("\t",
qw/20 ensembl region 30274334 30300924 . + ./, qw/20 ensembl region 30274334 30300924 . + ./,
'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;gene_id=ENSG00000131044;logic_name=ensembl;version=1' 'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1'
); );
$expected .= "\n"; $expected .= "\n";
...@@ -122,7 +122,7 @@ OUT ...@@ -122,7 +122,7 @@ OUT
#Have to do this outside of the HERETO thanks to tabs #Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t", $expected .= join("\t",
qw/20 wibble region 30274334 30300924 . + ./, qw/20 wibble region 30274334 30300924 . + ./,
'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;version=1' 'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1'
); );
$expected .= "\n"; $expected .= "\n";
assert_gff3($gene, $expected, 'Gene with custom source serialises to GFF3 as expected. Source is wibble'); assert_gff3($gene, $expected, 'Gene with custom source serialises to GFF3 as expected. Source is wibble');
...@@ -132,7 +132,7 @@ OUT ...@@ -132,7 +132,7 @@ OUT
OUT OUT
$expected .= join("\t", $expected .= join("\t",
qw/20 ensembl region 30274334 30298904 . + ./, qw/20 ensembl region 30274334 30298904 . + ./,
'ID=transcript:ENST00000310998;Name=C20orf125;Parent=gene:ENSG00000131044;biotype=protein_coding;logic_name=ensembl;transcript_id=ENST00000310998;version=1' 'ID=transcript:ENST00000310998;Name=C20orf125;Parent=gene:ENSG00000131044;biotype=protein_coding;logic_name=ensembl;projection_parent_transcript=ENSG_PARENT_TRANSCRIPT;transcript_id=ENST00000310998;version=1'
); );
$expected .= "\n"; $expected .= "\n";
assert_gff3($gene->canonical_transcript(), $expected, 'Transcript with custom source serialises to GFF3 as expected. Source is wibble'); assert_gff3($gene->canonical_transcript(), $expected, 'Transcript with custom source serialises to GFF3 as expected. Source is wibble');
...@@ -206,7 +206,7 @@ OUT ...@@ -206,7 +206,7 @@ OUT
#Have to do this outside of the HERETO thanks to tabs #Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t", $expected .= join("\t",
qw/20 ensembl region 30274334 30300924 . + ./, qw/20 ensembl region 30274334 30300924 . + ./,
'ID=gene:ENSG00000131044;Name=C20orf125;Dbxref=bibble,fibble;Ontology_term=GO:0001612;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;version=1' 'ID=gene:ENSG00000131044;Name=C20orf125;Dbxref=bibble,fibble;Ontology_term=GO:0001612;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1'
); );
$expected .= "\n"; $expected .= "\n";
......
This diff is collapsed.
...@@ -17,3 +17,5 @@ ...@@ -17,3 +17,5 @@
17 codon_table Codon Table Alternate codon table 17 codon_table Codon Table Alternate codon table
18 non_ref Non Reference Non Reference Sequence Region 18 non_ref Non Reference Non Reference Sequence Region
19 karyotype_rank Rank in the karyotype For a given seq_region, if it is part of the species karyotype, will indicate its rank 19 karyotype_rank Rank in the karyotype For a given seq_region, if it is part of the species karyotype, will indicate its rank
20 proj_parent_g projection parent gene Stable identifier of the parent gene this gene was projected from (projection between different species and/or assemblies).
21 proj_parent_t projection parent transcript Stable identifier of the parent transcript this transcript was projected from (projection between different species and/or assemblies).
18256 10 This is a note on gene 18256 18256 10 This is a note on gene 18256
18259 10 Another note on gene 18259 18259 10 Another note on gene 18259
18256 20 ENSG_PARENT_GENE
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment