Commit 9a505572 authored by Tiago Grego's avatar Tiago Grego
Browse files

updated gffSerialiser tests

parent 504625ad
# Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
# Copyright [2016-2019] EMBL-European Bioinformatics Institute
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## no critic (RequireFilenameMatchesPackage)
package Test::SO::Term;
use strict;
use warnings;
sub new {
my ($class) = @_;
return bless({}, ref($class) || $class);
}
# default lookup and always returns region
sub name {
my ($self) = @_;
return 'region';
}
package Test::SO;
use base qw/Bio::EnsEMBL::DBSQL::OntologyTermAdaptor/;
sub new {
my ($class) = @_;
return bless({}, ref($class) || $class);
}
sub fetch_by_accession {
my ($self) = @_;
return Test::SO::Term->new();
}
package main;
use strict;
use warnings;
use Test::More;
use Test::Warnings;
use Test::Warnings qw( warning );
use Test::Differences qw( eq_or_diff );
use Bio::EnsEMBL::Test::MultiTestDB;
use Bio::EnsEMBL::Utils::IO::GFFSerializer;
use Bio::EnsEMBL::Feature;
use Bio::EnsEMBL::Slice;
use IO::String;
use Test::Differences;
use FindBin qw/$Bin/;
my $db = Bio::EnsEMBL::Test::MultiTestDB->new(undef, "$Bin/..");
my $db = Bio::EnsEMBL::Test::MultiTestDB->new();
my $dba = $db->get_DBAdaptor('core');
my $omulti = Bio::EnsEMBL::Test::MultiTestDB->new('ontology', "$Bin/..");
my $odb = $omulti->get_DBAdaptor('ontology');
my $id = 'ENSG00000131044';
......@@ -75,8 +40,8 @@ my $ga = $dba->get_GeneAdaptor();
##sequence-region 20 30274334 30300924
OUT
#Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t",
qw/20 ensembl region 30274334 30300924 . + ./,
$expected .= join("\t",
qw/20 ensembl gene 30274334 30300924 . + ./,
'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1'
);
$expected .= "\n";
......@@ -103,13 +68,13 @@ OUT
##sequence-region wibble 1 10
OUT
#Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t",
$expected .= join("\t",
qw/wibble . region 1 10 . + ./,
''
''
);
$expected .= "\n";
assert_gff3($feature, $expected, 'Default feature should seralise without attributes but leave a trailing \t');
assert_gff3($feature, $expected, 'Default feature should serialise without attributes but leave a trailing \t');
}
......@@ -121,8 +86,8 @@ OUT
##sequence-region 20 30274334 30300924
OUT
#Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t",
qw/20 wibble region 30274334 30300924 . + ./,
$expected .= join("\t",
qw/20 wibble gene 30274334 30300924 . + ./,
'ID=gene:ENSG00000131044;Name=C20orf125;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1'
);
$expected .= "\n";
......@@ -131,8 +96,8 @@ OUT
##gff-version 3
##sequence-region 20 30274334 30298904
OUT
$expected .= join("\t",
qw/20 ensembl region 30274334 30298904 . + ./,
$expected .= join("\t",
qw/20 ensembl mRNA 30274334 30298904 . + ./,
'ID=transcript:ENST00000310998;Name=C20orf125;Parent=gene:ENSG00000131044;biotype=protein_coding;logic_name=ensembl;projection_parent_transcript=ENSG_PARENT_TRANSCRIPT;transcript_id=ENST00000310998;version=1'
);
$expected .= "\n";
......@@ -143,8 +108,8 @@ OUT
##sequence-region 20 30274334 30274425
OUT
$expected .= join("\t",
qw/20 ensembl region 30274334 30274425 . + 0/,
'ID=region:ENSP00000308980;Parent=transcript:ENST00000310998;protein_id=ENSP00000308980'
qw/20 ensembl CDS 30274334 30274425 . + 0/,
'ID=CDS:ENSP00000308980;Parent=transcript:ENST00000310998;protein_id=ENSP00000308980'
);
$expected .= "\n";
my $cds = $gene->canonical_transcript->get_all_CDS();
......@@ -157,7 +122,7 @@ OUT
##sequence-region 20 30274334 30274425
OUT
$expected .= join("\t",
qw/20 ensembl region 30274334 30274425 . + ./,
qw/20 ensembl exon 30274334 30274425 . + ./,
'Name=ENSE00001155821;Parent=transcript:ENST00000310998;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=ENSE00001155821;rank=1;version=1'
);
$expected .= "\n";
......@@ -172,14 +137,14 @@ OUT
##sequence-region 20 30583501 30583588
OUT
$expected .= join("\t",
qw/20 ensembl region 30583501 30583588 . - ./,
qw/20 ensembl five_prime_UTR 30583501 30583588 . - ./,
'Parent=transcript:ENST00000246229'
);
$expected .= "\n";
assert_gff3($utrs->[0], $expected, 'UTR feature serialises to GFF3 as expected');
$cds_expected .= join("\t",
qw/20 ensembl region 30274334 30274425 . + ./,
qw/20 ensembl exon 30274334 30274425 . + ./,
'Name=ENSE00001155821;Parent=transcript:ENST00000310998;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=ENSE00001155821;rank=1;version=1'
);
$cds_expected .= "\n";
......@@ -193,20 +158,20 @@ OUT
{
my $gene = $ga->fetch_by_stable_id($id);
my $summary = $gene->summary_as_hash;
$$summary{'Dbxref'} = ['bibble', 'fibble'];
$$summary{'Ontology_term'} = 'GO:0001612';
local undef &{Bio::EnsEMBL::Gene::summary_as_hash};
local *{Bio::EnsEMBL::Gene::summary_as_hash} = sub {return $summary};
my $expected = <<'OUT';
##gff-version 3
##sequence-region 20 30274334 30300924
OUT
#Have to do this outside of the HERETO thanks to tabs
$expected .= join("\t",
qw/20 ensembl region 30274334 30300924 . + ./,
$expected .= join("\t",
qw/20 ensembl gene 30274334 30300924 . + ./,
'ID=gene:ENSG00000131044;Name=C20orf125;Dbxref=bibble,fibble;Ontology_term=GO:0001612;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];gene_id=ENSG00000131044;logic_name=ensembl;projection_parent_gene=ENSG_PARENT_GENE;version=1'
);
$expected .= "\n";
......@@ -214,21 +179,37 @@ OUT
assert_gff3($gene, $expected, 'Gene with array- and string-valued attributes as expected.');
}
{
my $fh = IO::String->new();
my $ontology_adaptor = {};
bless($ontology_adaptor, 'Bio::EnsEMBL::DBSQL::OntologyTermAdaptor');
my $warning = warning {
my $ser = Bio::EnsEMBL::Utils::IO::GFFSerializer->new($ontology_adaptor, $fh) };
like( $warning,
qr/GFF format does not require an instance of Bio::EnsEMBL::DBSQL::OntologyTermAdaptor anymore./,
"Legacy constructor throws warning",
) or diag 'Got warning: ', explain($warning);
}
sub assert_gff3 {
my ($feature, $expected, $msg) = @_;
my $ota = Test::SO->new();
my $fh = IO::String->new();
my $ser = Bio::EnsEMBL::Utils::IO::GFFSerializer->new($ota, $fh);
my $ser = Bio::EnsEMBL::Utils::IO::GFFSerializer->new($fh);
$ser->print_main_header([$feature->feature_Slice()]);
$ser->print_feature($feature);
eq_or_diff(${$fh->string_ref()}, $expected, $msg);
}
sub assert_gff3_list {
my ($features, $expected, $msg) = @_;
my $ota = Test::SO->new();
my $fh = IO::String->new();
my $ser = Bio::EnsEMBL::Utils::IO::GFFSerializer->new($ota, $fh);
my $ser = Bio::EnsEMBL::Utils::IO::GFFSerializer->new($fh);
$ser->print_main_header([$features->[0]->feature_Slice()]);
$ser->print_feature_list($features);
eq_or_diff(${$fh->string_ref()}, $expected, $msg);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment