Unverified Commit 85a3c136 authored by Tiago Grego's avatar Tiago Grego Committed by GitHub
Browse files

Merge pull request #68 from Ensembl/unification_try2

Merging of unified-design with master branches
parents 0d9284f7 a6a11141
blib/
.build/
_build/
cover_db/
inc/
Build
!Build/
Build.bat
.last_cover_stats
MANIFEST.bak
META.yml
MYMETA.yml
nytprof.out
pm_to_blib
.DS_Store
Thumbs.db
*.swp
*.swo
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
.elc
auto-save-list
tramp
.\#*
# Org-mode
.org-id-locations
*_archive
a.out
*.o
*.obj
*.class
MultiTestDB.conf
*.frozen.conf
bioperl*.zip
/modules/t/CLEAN.t
/misc-scripts/xref_mapping/sql/populate_metadata.sql
......@@ -58,11 +58,12 @@ install:
- cpanm Bio::DB::HTS
- cpanm -n Devel::Cover::Report::Coveralls
- cpanm -n DBD::SQLite
- cpanm JSON
- cpanm URI::Escape
- mysql -u root -h localhost -e 'GRANT ALL PRIVILEGES ON *.* TO "travis"@"%"'
# - cp travisci/MultiTestDB.conf.travisci.mysql modules/t/MultiTestDB.conf.mysql
- cp travisci/MultiTestDB.conf.travisci.mysql modules/t/MultiTestDB.conf
# - cp travisci/MultiTestDB.conf.travisci.SQLite modules/t/MultiTestDB.conf.SQLite
script: "./travisci/harness.sh"
# Get the matrix to only build coveralls support when on 5.10
......
......@@ -18,7 +18,9 @@ limitations under the License.
=cut
package Bio::EnsEMBL::IO::Adaptor::BAMAdaptor;
use strict;
use warnings;
use Bio::EnsEMBL::Feature;
use Data::Dumper;
......@@ -101,7 +103,7 @@ sub munge_chr_id {
my $bam = $self->bam_open;
warn "Failed to open BAM file " . $self->url unless $bam;
return undef unless $bam;
return unless $bam;
my $header = $bam->header;
......@@ -130,7 +132,7 @@ sub munge_chr_id {
}
warn " *** could not parse_region for BAM with $chr_id in file " . $self->url ."\n";
return undef;
return;
}
sub fetch_paired_alignments {
......
......@@ -18,8 +18,9 @@ limitations under the License.
=cut
package Bio::EnsEMBL::IO::Adaptor::BigBedAdaptor;
use strict;
use strict;
use warnings;
#########################################################################
# DEPRECATED MODULE - please use Bio::EnsEMBL::IO::Parser::BigBed instead
......@@ -130,7 +131,7 @@ sub munge_chr_id {
warn "Failed to open BigBed file " . $self->url unless $bb;
return undef unless $bb;
return unless $bb;
my $list = $bb->chromList;
my $head = $list->head;
......
......@@ -18,7 +18,9 @@ limitations under the License.
=cut
package Bio::EnsEMBL::IO::Adaptor::BigWigAdaptor;
use strict;
use warnings;
#########################################################################
# DEPRECATED MODULE - please use Bio::EnsEMBL::IO::Parser::BigBed instead
......@@ -77,7 +79,7 @@ sub munge_chr_id {
warn "Failed to open BigWig file " . $self->url unless $bw;
return undef unless $bw;
return unless $bw;
my $list = $bw->chromList;
my $head = $list->head;
......
......@@ -18,7 +18,9 @@ limitations under the License.
=cut
package Bio::EnsEMBL::IO::Adaptor::HTSAdaptor;
use strict;
use warnings;
use Bio::EnsEMBL::Feature;
use Data::Dumper;
......@@ -107,7 +109,7 @@ sub munge_chr_id {
}
warn " *** could not parse_region for BAM/CRAM with $chr_id in file " . $self->url ."\n";
return undef;
return;
}
sub fetch_paired_alignments {
......
......@@ -23,8 +23,8 @@ package Bio::EnsEMBL::IO::Adaptor::PairwiseAdaptor;
# No longer in use by the webcode; will be removed in November 2016
####################################################################
use strict;
use warnings;
use Bio::EnsEMBL::IO::Parser::PairwiseTabix;
......
......@@ -18,7 +18,9 @@ limitations under the License.
=cut
package Bio::EnsEMBL::IO::Adaptor::VCFAdaptor;
use strict;
use warnings;
use EnsEMBL::Web::Utils::FormatText qw(date_format);
use File::Path qw(make_path);
......
......@@ -33,7 +33,7 @@ use Carp;
use Bio::EnsEMBL::IO::Object::ColumnBasedGeneric;
use base qw/Bio::EnsEMBL::IO::TextParser/;
use parent qw/Bio::EnsEMBL::IO::TextParser/;
our %sub_strings = (
'\n' => "\n",
......@@ -312,7 +312,7 @@ sub validate_metadata {
## TODO - implement validation!
}
return undef;
return;
}
=head2 validate_record
......
......@@ -50,7 +50,7 @@ package Bio::EnsEMBL::IO::Format;
use strict;
use warnings;
use Bio::EnsEMBL::IO::NamedColours;
use Bio::EnsEMBL::Utils::NamedColours;
=head2 new
......@@ -105,7 +105,7 @@ sub extensions {
=head2 delimiter
Description : getter for delimiter (if format has one)
Description : getter for default delimiter (if format has one)
Returntype : String
=cut
......@@ -115,6 +115,30 @@ sub delimiter {
return $self->{'delimiter'};
}
=head2 delimiter_regex
Description : Regex for validating delimiter(s)
Returntype : String
=cut
sub delimiter_regex {
my $self = shift;
return $self->{'delimiter_regex'};
}
=head2 empty_column
Description : getter for value used in empty columns
Returntype : String
=cut
sub empty_column {
my $self = shift;
return $self->{'empty_column'} || '';
}
=head2 can_multitrack
Description : getter for multitrack flag
......@@ -241,13 +265,33 @@ sub get_value_for_field {
return $info->{$value};
}
=head2 get_accessors
Description : get array of Ensembl accessor names instead of the official field names
Returntype : Arrayref
=cut
sub get_accessors {
my $self = shift;
my $info = $self->get_field_info;
my $order = $self->get_field_order;
my $accessors = [];
foreach (@$order) {
my $name = $info->{$_}{'accessor'} || $_;
push @$accessors, $name;
}
return $accessors;
}
########## VALIDATION METHODS #################
=head2 validate_as
Description : wrapper around more specific validators, for easy processing
Args : Errors - hash of existing errors
: Type - validation type
Args : Type - validation type
: Value - value to be checked
: Match (optional) - a specific value or range to be compared against
Returntype : Boolean
......@@ -255,13 +299,13 @@ sub get_value_for_field {
=cut
sub validate_as {
my ($self, $errors, $key, $type, $value, $match) = @_;
return unless ($type && defined($value));
my ($self, $type, $value, $match) = @_;
return 0 unless ($type && defined($value));
my $method = 'validate_as_'.$type;
if ($self->can($method)) {
my $error = $self->$method($value, $match);
$errors->{$key} = $error if $error;
return $self->$method($value, $match);
}
return 0;
}
=head2 validate_as_boolean
......@@ -274,7 +318,7 @@ sub validate_as {
sub validate_as_boolean {
my ($self, $value) = @_;
return ($value == 0 || $value == 1) ? undef : "Value $value is not boolean";;
return ($value == 0 || $value == 1) ? 1 : 0;
}
=head2 validate_as_string
......@@ -289,14 +333,12 @@ sub validate_as_boolean {
sub validate_as_string {
my ($self, $value, $match) = @_;
my $error;
if ($match) {
$error = "String $value does not match $match" unless $value eq $match;
return $value eq $match ? 1 : 0;
}
else {
$error = "Value $value is not a string" unless $value =~ /[[:print:]]+/;
return $value =~ /[[:print:]]+/ ? 1 : 0;
}
return $error;
}
=head2 validate_as_integer
......@@ -309,7 +351,7 @@ sub validate_as_string {
sub validate_as_integer {
my ($self, $value) = @_;
return $value =~ /^-?\d+$/ ? undef : "Value $value is not an integer";;
return $value =~ /^-?\d+$/ ? 1 : 0;
}
=head2 validate_as_floating_point
......@@ -322,7 +364,7 @@ sub validate_as_integer {
sub validate_as_floating_point {
my ($self, $value) = @_;
return $value =~ /^-?\d+\.?\d*$/ ? undef : "Value $value is not a floating point number";
return $value =~ /^-?\d+\.?\d*$/ ? 1 : 0;
}
=head2 validate_as_range
......@@ -336,9 +378,9 @@ sub validate_as_floating_point {
sub validate_as_range {
my ($self, $value, $match) = @_;
return undef unless ($match && ref $match eq 'ARRAY');
return 0 unless ($match && ref $match eq 'ARRAY');
my ($min, $max) = @$match;
return ($value <= $max && $value >= $min) ? undef : "Value $value is not in the range $min - $max";
return ($value <= $max && $value >= $min) ? 1 : 0;
}
=head2 validate_as_comma_separated
......@@ -352,7 +394,7 @@ sub validate_as_range {
sub validate_as_comma_separated {
my ($self, $value) = @_;
return $value =~ /^(\w+,?)+$/ ? undef : "Value $value is not comma-separated";
return $value =~ /^(\w+,?)+$/ ? 1 : 0;
}
=head2 validate_as_case_insensitive
......@@ -366,8 +408,8 @@ sub validate_as_comma_separated {
sub validate_as_case_insensitive {
my ($self, $value, $match) = @_;
return undef unless $match;
return $value =~ /$match/i ? undef : "Value $value does not match $match";
return 0 unless $match;
return $value =~ /$match/i ? 1 : 0;
}
=head2 validate_as_strand_integer
......@@ -380,11 +422,11 @@ sub validate_as_case_insensitive {
sub validate_as_strand_integer {
my ($self, $value) = @_;
return $value =~ /^0|1|-1$/ ? undef : "Value $value is not an integer-formatted strand";
return $value =~ /^0|1|-1$/ ? 1 : 0;
}
=head2 validate_as_strand_plusminus
Description : Validator for fields that should contain a strand as + or -
Description : Validator for fields that should contain a strand as one of + - ?
Args : Type - validation type
: Value - value to be checked
Returntype : Boolean
......@@ -392,9 +434,23 @@ sub validate_as_strand_integer {
sub validate_as_strand_plusminus {
my ($self, $value) = @_;
return $value =~ /^\+|-$/ ? undef : "Value $value is not a plus/minus strand";
return $value =~ /^\+|-|\?|\.$/ ? 1 : 0;
}
=head2 validate_as_phase
Description : Validator for fields that should contain a coding phase, i.e. 0, 1 or 2
Args : Value - value to be checked
Returntype : Boolean
=cut
sub validate_as_phase {
my ($self, $value) = @_;
return $value =~ /^0|1|2$/ ? 1 : 0;
}
=head2 validate_as_rgb_string
Description : Validator for fields that should contain an RGB colour as a comma-separated string
......@@ -407,9 +463,9 @@ sub validate_as_rgb_string {
my ($self, $value) = @_;
## Technically 0 is not a valid colour, but it's used instead of '.' in some UCSC examples
return undef if $value eq '0';
return 1 if $value == 0;
return $value =~ /^(\d){1,3},(\d){1,3},(\d){1,3}$/ ? undef : "Value $value is not an RGB string";
return $value =~ /^(\d){1,3},(\d){1,3},(\d){1,3}$/ ? 1 : 0;
}
=head2 validate_as_colour
......@@ -423,22 +479,22 @@ sub validate_as_colour {
my ($self, $value) = @_;
## Technically 0 is not a valid colour, but it's used instead of '.' in some UCSC examples
return undef if $value eq 0;
return 1 if $value == 0;
## Try RGB first, as that's most usual
my $error = $self->validate_as_rgb_string($value);
my $valid = $self->validate_as_rgb_string($value);
## If not, how about web-friendly hex colours, e.g. #ffcc00?
if ($error) {
$error = undef if ($value =~ /^#?[A-Fa-f0-9]{3}/ || $value =~ /^#?[A-Fa-f0-9]{6}/);
unless ($valid) {
$valid = 1 if ($value =~ /^#?[A-Fa-f0-9]{3}/ || $value =~ /^#?[A-Fa-f0-9]{6}/);
}
## Fall back to checking Unix named colours
if ($error) {
my $lookup = Bio::EnsEMBL::IO::NamedColours::named_colours;
$error = $lookup->{$value} ? undef : "Value $value is not a named colour";
unless ($valid) {
my $lookup = named_colours();
$valid = 1 if $lookup->{$value};
}
return $error;
return $valid;
}
=head2 validate_as_sequence
......@@ -451,7 +507,7 @@ sub validate_as_colour {
sub validate_as_sequence {
my ($self, $value) = @_;
return $value =~ /^[ACDEFGHIKLMNPQRSTUVWY]+$/i ? undef : "Value $value is not nucleic acid or protein sequence";;
return $value =~ /^[ACDEFGHIKLMNPQRSTUVWY]+$/i ? 1 : 0;
}
=head2 validate_as_dna_sequence
......@@ -464,7 +520,7 @@ sub validate_as_sequence {
sub validate_as_dna_sequence {
my ($self, $value) = @_;
return $value =~ /^[ACGTN]+$/i ? undef : "Value $value is not DNA sequence";
return $value =~ /^[ACGTN]+$/i ? 1 : 0;
}
1;
......@@ -40,7 +40,9 @@ sub new {
my $self = {
'name' => 'Bed',
'extensions' => ['bed'],
'delimiter' => '\t|\s',
'delimiter' => "\t",
'delimiter_regex' => '\t|\s+',
'empty_column' => '.',
'can_multitrack' => 1,
'can_metadata' => -1,
'metadata_info' => {
......@@ -79,14 +81,17 @@ sub new {
'chrom' => {
'validate_as' => 'string',
'optional' => 0,
'accessor' => 'seqname',
},
'chromStart' => {
'validate_as' => 'integer',
'optional' => 0,
'accessor' => 'bedstart',
},
'chromEnd' => {
'validate_as' => 'integer',
'optional' => 0,
'accessor' => 'end',
},
'name' => {
'validate_as' => 'string',
......
=pod
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2018] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=head1 NAME
Bio::EnsEMBL::IO::Format::GFF3 - an object that defines and validates GFF3-based formats
=cut
package Bio::EnsEMBL::IO::Format::GFF3;
use strict;
use warnings;
use parent qw(Bio::EnsEMBL::IO::Format);
my %strand_mapping = (1 => '+', -1 => '-');
sub new {
my $class = shift;
my $self = {
'name' => 'GFF3',
'extensions' => ['gff', 'gff3'],
'delimiter' => "\t",
'delimiter_regex' => '\t',
'empty_column' => '.',
'can_multitrack' => 0,
'can_metadata' => 1,
'metadata_info' => {
'name' => {
'optional' => 1,
'validate_as' => 'string',
'unique' => 1,
},
'description' => {
'optional' => 1,
'validate_as' => 'string',
},
'priority' => {
'optional' => 1,
'validate_as' => 'integer',
},
},
'field_info' => {
'seqid' => {
'validate_as' => 'string',
'optional' => 0,
'accessor' => 'seqname',
},
'source' => {
'validate_as' => 'string',
'optional' => 0,
},
'type' => {
'validate_as' => 'string',
'optional' => 0,
},
'start' => {
'validate_as' => 'integer',
'optional' => 0,
},
'end' => {
'validate_as' => 'integer',
'optional' => 0,
},
'score' => {
'validate_as' => 'floating_point',
'optional' => 0,
},
'strand' => {
'validate_as' => 'strand_plusminus',
'optional' => 0,
},
'phase' => {
'validate_as' => 'phase',
'optional' => 0,
},
'attributes' => {
'validate_as' => 'string',
'optional' => 0,
},
},
'field_order' => [qw(seqname source type start end score strand phase attributes)],
};
bless $self, $class;
};
=head2 strand_conversion
Description: Access the strand conversion mappings
=cut
sub strand_conversion {
my $self = shift;
return \%strand_mapping;
}
1;
=pod
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2018] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.