Skip to content
Snippets Groups Projects
Commit 64bdf0ea authored by Matthew Laird's avatar Matthew Laird Committed by GitHub
Browse files

Merge pull request #165 from Ensembl/ENSCORESW-2003

Updates to include projection parent id in GTF dumps - attribute code…
parents f547798a 229316af
No related branches found
No related tags found
No related merge requests found
......@@ -457,6 +457,13 @@ sub _print_attribs {
print $fh " havana_gene_version \"" . $gene->havana_gene->version() . "\";";
}
#add projection parent
my $proj_parent_attributes = $transcript->get_all_Attributes("proj_parent_g");
if (@{$proj_parent_attributes}) {
my $value = $proj_parent_attributes->[0]->value;
print $fh qq{ projection_parent_gene "${value}";};
}
if($type ne 'gene') {
print $fh " transcript_name \"" . $trans_name . "\";"
if ($trans_name);
......@@ -509,6 +516,11 @@ sub _print_attribs {
$value =~ s/tsl//;
print $fh qq{ transcript_support_level "${value}";};
}
my $proj_parent_attributes = $transcript->get_all_Attributes("proj_parent_t");
if (@{$proj_parent_attributes}) {
my $value = $proj_parent_attributes->[0]->value;
print $fh qq{ projection_parent_transcript "${value}";};
}
}
return;
......
......@@ -55,6 +55,14 @@ my $db = $mtdb->get_DBAdaptor("core");
$transcript->translation($translation);
my $gene = Bio::EnsEMBL::Gene->new(-TRANSCRIPTS => [$transcript], -STABLE_ID => 'GENE', -BIOTYPE => 'protein_coding');
$transcript->add_DBEntry(Bio::EnsEMBL::DBEntry->new(-PRIMARY_ID => 'CCDS.1', -DBNAME => 'CCDS'));
#add a gene attribute
my $attrib_g = Bio::EnsEMBL::Attribute->new(-CODE => 'proj_parent_g', -NAME => 'projection parent gene', -DESCRIPTION => 'Stable identifier of the parent gene', -VALUE =>'ENSG_PARENT_GENE');
$gene->add_Attributes($attrib_g);
#add a transcript attribute
my $attrib_t = Bio::EnsEMBL::Attribute->new (-CODE => 'proj_parent_t', -NAME => 'projection parent transcript', -DESCRIPTION => 'Stable identifier of the parent transcript', -VALUE =>'ENST_PARENT_TRANSCRIPT');
$transcript->add_Attributes($attrib_t);
# Stupid transcript code has a cache per DB if using it.
$transcript->{dbentriesCCDS} = $transcript->{dbentries};
......@@ -63,15 +71,15 @@ my $db = $mtdb->get_DBAdaptor("core");
my $gtf_serializer = Bio::EnsEMBL::Utils::IO::GTFSerializer->new($fh);
$gtf_serializer->print_Gene($gene);
my $gtf = <<GTF;
20\tensembl\tgene\t30274331\t30274404\t.\t+\t.\tgene_id \"GENE\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\";
20\tensembl\ttranscript\t30274331\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\";
20\tensembl\tSelenocysteine\t30274337\t30274339\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\";
20\tensembl\texon\t30274331\t30274348\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"1\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; exon_id \"e1\"; tag \"seleno\";
20\tensembl\tCDS\t30274334\t30274345\t.\t+\t0\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"1\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; protein_id \"PEP\"; tag \"seleno\";
20\tensembl\texon\t30274401\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"2\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; exon_id \"e2\"; tag \"seleno\";
20\tensembl\tfive_prime_utr\t30274331\t30274333\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\";
20\tensembl\tthree_prime_utr\t30274346\t30274348\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\";
20\tensembl\tthree_prime_utr\t30274401\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\";
20\tensembl\tgene\t30274331\t30274404\t.\t+\t.\tgene_id \"GENE\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; projection_parent_gene \"ENSG_PARENT_GENE\";
20\tensembl\ttranscript\t30274331\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tSelenocysteine\t30274337\t30274339\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\texon\t30274331\t30274348\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"1\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; exon_id \"e1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tCDS\t30274334\t30274345\t.\t+\t0\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"1\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; protein_id \"PEP\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\texon\t30274401\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; exon_number \"2\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; exon_id \"e2\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tfive_prime_utr\t30274331\t30274333\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tthree_prime_utr\t30274346\t30274348\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
20\tensembl\tthree_prime_utr\t30274401\t30274404\t.\t+\t.\tgene_id \"GENE\"; transcript_id \"TRANS\"; gene_source \"ensembl\"; gene_biotype \"protein_coding\"; transcript_source \"ensembl\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS.1\"; tag \"seleno\"; projection_parent_transcript "ENST_PARENT_TRANSCRIPT";
GTF
eq_or_diff(${$fh->string_ref}, $gtf, 'Checking custom Gene object dumps UTRs, Selenocysteine, seleno tag and CCDS');
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment