Skip to content
Snippets Groups Projects
Commit c6fb4ef2 authored by Patrick Meidl's avatar Patrick Meidl
Browse files

added documentation

parent f972e12d
No related branches found
No related tags found
No related merge requests found
......@@ -2,15 +2,34 @@ package Bio::EnsEMBL::IdMapping::BaseObject;
=head1 NAME
Bio::EnsEMBL::IdMapping::BaseObject - base object for IdMapping objects
=head1 SYNOPSIS
# this object isn't instantiated directly but rather extended
use Bio::EnsEMBL::IdMapping::BaseObject;
our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
=head1 DESCRIPTION
This is the base object for some of the objects used in the IdMapping
application. An object that extends BaseObject will have a ConfParser, Logger
and Cache object. BaseObject also implements some useful utility functions
related to file and db access.
This isn't very clean OO design but it's efficient and easy to use...
=head1 METHODS
new
get_filehandle
file_exists
fetch_value_from_db
dump_table_to_file
upload_file_into_table
logger
conf
cache
=head1 LICENCE
......@@ -40,12 +59,20 @@ use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
=head2 new
Arg[1] :
Example :
Description : constructor
Return type :
Exceptions :
Arg [LOGGER]: Bio::EnsEMBL::Utils::Logger $logger - a logger object
Arg [CONF] : Bio::EnsEMBL::Utils::ConfParser $conf - a configuration object
Arg [CACHE] : Bio::EnsEMBL::IdMapping::Cache $cache - a cache object
Example : my $object = Bio::EnsEMBL::IdMapping::BaseObjectSubclass->new(
-LOGGER => $logger,
-CONF => $conf,
-CACHE => $cache
);
Description : Constructor
Return type : implementing subclass type
Exceptions : thrown on wrong or missing arguments
Caller : general
Status : At Risk
: under development
=cut
......@@ -55,15 +82,18 @@ sub new {
my ($logger, $conf, $cache) = rearrange(['LOGGER', 'CONF', 'CACHE'], @_);
unless ($logger->isa('Bio::EnsEMBL::Utils::Logger')) {
unless ($logger and ref($logger) and
$logger->isa('Bio::EnsEMBL::Utils::Logger')) {
throw("You must provide a Bio::EnsEMBL::Utils::Logger for logging.");
}
unless ($conf->isa('Bio::EnsEMBL::Utils::ConfParser')) {
unless ($conf and ref($conf) and
$conf->isa('Bio::EnsEMBL::Utils::ConfParser')) {
throw("You must provide configuration as a Bio::EnsEMBL::Utils::ConfParser object.");
}
unless ($cache->isa('Bio::EnsEMBL::IdMapping::Cache')) {
unless ($cache and ref($cache) and
$cache->isa('Bio::EnsEMBL::IdMapping::Cache')) {
throw("You must provide configuration as a Bio::EnsEMBL::IdMapping::Cache object.");
}
......@@ -79,6 +109,25 @@ sub new {
}
=head2 get_filehandle
Arg[1] : String $filename - filename for filehandle
Arg[2] : String $path_append - append subdirectory name to dumppath
Arg[3] : String $mode - filehandle mode (<|>|>>)
Example : my $fh = $object->get_filehandle('mapping_stats.txt', 'stats',
'>');
print $fh "Stats:\n";
Description : Returns a filehandle to a file for reading or writing. The file
is qualified with the dumppath defined in the configuration and
an optional subdirectory name.
Return type : filehandle
Exceptions : thrown on missing filename
Caller : general
Status : At Risk
: under development
=cut
sub get_filehandle {
my $self = shift;
my $filename = shift;
......@@ -99,6 +148,22 @@ sub get_filehandle {
}
=head2 file_exists
Arg[1] : String $filename - filename to test
Arg[2] : Boolean $path_append - turn on pre-pending of dumppath
Example : unless ($object->file_exists('gene_mappings.ser', 1)) {
$object->do_gene_mapping;
}
Description : Tests if a file exists and has non-zero size.
Return type : Boolean
Exceptions : none
Caller : general
Status : At Risk
: under development
=cut
sub file_exists {
my $self = shift;
my $filename = shift;
......@@ -111,6 +176,23 @@ sub file_exists {
}
=head2 fetch_value_from_db
Arg[1] : DBI::db $dbh - a DBI database handle
Arg[2] : String $sql - SQL statement to execute
Example : my $num_genes = $object->fetch_value_from_db($dbh,
'SELECT count(*) FROM gene');
Description : Executes an SQL statement on a db handle and returns the first
column of the first row returned. Useful for queries returning a
single value, like table counts.
Return type : Return type of SQL statement
Exceptions : thrown on wrong or missing arguments
Caller : general
Status : At Risk
: under development
=cut
sub fetch_value_from_db {
my $self = shift;
my $dbh = shift;
......@@ -127,6 +209,25 @@ sub fetch_value_from_db {
}
=head2 dump_table_to_file
Arg[1] : String $dbtype - db type (source|target)
Arg[2] : String $table - name of table to dump
Arg[3] : String $filename - name of dump file
Arg[4] : Boolean $check_existing - turn on test for existing dump
Example : my $rows_dumped = $object->dump_table_to_file('source',
'stable_id_event', 'stable_id_event_existing.txt');
Description : Dumps the contents of a db table to a tab-delimited file. The
dump file will be written to a subdirectory called 'tables'
under the dumppath from your configuration.
Return type : Int - the number of rows dumped
Exceptions : thrown on wrong or missing arguments
Caller : general
Status : At Risk
: under development
=cut
sub dump_table_to_file {
my $self = shift;
my $dbtype = shift;
......@@ -174,6 +275,25 @@ sub dump_table_to_file {
}
=head2 upload_file_into_table
Arg[1] : String $dbtype - db type (source|target)
Arg[2] : String $table - name of table to upload the data to
Arg[3] : String $filename - name of dump file
Example : my $rows_uploaded = $object->upload_file_into_table('target',
'stable_id_event', 'stable_id_event_new.txt');
Description : Uploads a tab-delimited data file into a db table. The data file
will be taken from a subdirectory 'tables' under your configured
dumppath. If the db table isn't empty, no data is uploaded (and
a warning is issued).
Return type : Int - the number of rows uploaded
Exceptions : thrown on wrong or missing arguments
Caller : general
Status : At Risk
: under development
=cut
sub upload_file_into_table {
my $self = shift;
my $dbtype = shift;
......@@ -230,6 +350,19 @@ sub upload_file_into_table {
}
=head2 logger
Arg[1] : (optional) Bio::EnsEMBL::Utils::Logger - the logger to set
Example : $object->logger->info("Starting ID mapping.\n");
Description : Getter/setter for logger object
Return type : Bio::EnsEMBL::Utils::Logger
Exceptions : none
Caller : constructor
Status : At Risk
: under development
=cut
sub logger {
my $self = shift;
$self->{'_logger'} = shift if (@_);
......@@ -237,6 +370,20 @@ sub logger {
}
=head2 conf
Arg[1] : (optional) Bio::EnsEMBL::Utils::ConfParser - the configuration
to set
Example : my $dumppath = $object->conf->param('dumppath');
Description : Getter/setter for configuration object
Return type : Bio::EnsEMBL::Utils::ConfParser
Exceptions : none
Caller : constructor
Status : At Risk
: under development
=cut
sub conf {
my $self = shift;
$self->{'_conf'} = shift if (@_);
......@@ -244,6 +391,19 @@ sub conf {
}
=head2 cache
Arg[1] : (optional) Bio::EnsEMBL::IdMapping::Cache - the cache to set
Example : $object->cache->read_from_file('genes_by_id', 'source');
Description : Getter/setter for cache object
Return type : Bio::EnsEMBL::IdMapping::Cache
Exceptions : none
Caller : constructor
Status : At Risk
: under development
=cut
sub cache {
my $self = shift;
$self->{'_cache'} = shift if (@_);
......
......@@ -2,6 +2,8 @@ package Bio::EnsEMBL::IdMapping::Cache;
=head1 NAME
Bio::EnsEMBL::IdMapping::Cache - a cache to hold data objects used by the
IdMapping application
=head1 SYNOPSIS
......@@ -58,12 +60,18 @@ my @cache_names = qw(
=head2 new
Arg[1] :
Example :
Arg [LOGGER]: Bio::EnsEMBL::Utils::Logger $logger - a logger object
Arg [CONF] : Bio::EnsEMBL::Utils::ConfParser $conf - a configuration object
Example : my $cache = Bio::EnsEMBL::IdMapping::Cache->new(
-LOGGER => $logger,
-CONF => $conf,
);
Description : constructor
Return type : Bio::EnsEMBL::IdMapping::Cache object
Exceptions :
Exceptions : thrown on wrong or missing arguments
Caller : general
Status : At Risk
: under development
=cut
......@@ -92,6 +100,25 @@ sub new {
}
=head2 build_cache
Arg[1] : String $dbtype - db type (source|target)
Arg[2] : String $slice_name - the name of a slice (format as returned by
Bio::EnsEMBL::Slice->name)
Example : my ($num_genes, $filesize) = $cache->build_cache('source',
'chromosome:NCBI36:X:1:1000000:-1');
Description : Builds a cache of genes, transcripts, translations and exons
needed by the IdMapping application and serialises the resulting
cache object to a file.
Return type : list of the number of genes processed and the size of the
serialised cache file
Exceptions : thrown on invalid slice name
Caller : general
Status : At Risk
: under development
=cut
sub build_cache {
my $self = shift;
my $dbtype = shift;
......@@ -99,7 +126,12 @@ sub build_cache {
my $dba = $self->get_DBAdaptor($dbtype);
my $sa = $dba->get_SliceAdaptor;
my $slice = $sa->fetch_by_name($slice_name);
unless ($slice) {
throw("Could not retrieve slice $slice_name.");
}
my $genes = $slice->get_all_Genes(undef, undef, 1);
# biotype filter
......@@ -131,6 +163,20 @@ sub build_cache {
}
=head2 filter_biotypes
Arg[1] : Listref of Bio::EnsEMBL::Genes $genes - the genes to filter
Example : my @filtered = @{ $cache->filter_biotypes(\@genes) };
Description : Filters a list of genes by biotype. Biotypes are taken from the
IdMapping configuration parameter 'biotypes'.
Return type : Listref of Bio::EnsEMBL::Genes (or empty list)
Exceptions : none
Caller : internal
Status : At Risk
: under development
=cut
sub filter_biotypes {
my $self = shift;
my $genes = shift;
......@@ -145,6 +191,31 @@ sub filter_biotypes {
}
=head2 build_cache_from_genes
Arg[1] : String $type - cache type
Arg[2] : Listref of Bio::EnsEMBL::Genes $genes - genes to build cache
from
Arg[3] : Boolean $need_project - indicate if we need to project exons to
common coordinate system
Example : $cache->build_cache_from_genes(
'source.chromosome:NCBI36:X:1:100000:1', \@genes);
Description : Builds the cache by fetching transcripts, translations and exons
for a list of genes from the database, and creating lightweight
Bio::EnsEMBL::IdMapping::TinyFeature objects containing only the
data needed by the IdMapping application. These objects are
attached to a name cache in this cache object. Exons only need
to be projected to a commond coordinate system if their native
coordinate system isn't common to source and target assembly
itself.
Return type : none
Exceptions : thrown on wrong or missing arguments
Caller : internal
Status : At Risk
: under development
=cut
sub build_cache_from_genes {
my $self = shift;
my $type = shift;
......@@ -185,7 +256,6 @@ sub build_cache_from_genes {
# build gene caches
$self->add('genes_by_id', $type, $gene->dbID, $lgene);
#$self->add('genes_by_stable_id', $type, $gene->stable_id, $lgene);
# transcripts
foreach my $tr (@{ $gene->get_all_Transcripts }) {
......@@ -206,7 +276,6 @@ sub build_cache_from_genes {
# build transcript caches
$self->add('transcripts_by_id', $type, $tr->dbID, $ltr);
#$self->add('transcripts_by_stable_id', $type, $tr->stable_id, $ltr);
$self->add('genes_by_transcript_id', $type, $tr->dbID, $lgene);
# translation (if there is one)
......@@ -225,8 +294,6 @@ sub build_cache_from_genes {
$ltr->add_Translation($ltl);
$self->add('translations_by_id', $type, $tl->dbID, $ltl);
#$self->add('translations_by_stable_id', $type, $tl->stable_id, $ltl);
#$self->add('translations_by_transcript_id', $type, $tr->dbID, $ltl);
undef $tl;
}
......@@ -267,7 +334,6 @@ sub build_cache_from_genes {
$ltr->add_Exon($lexon);
$self->add('exons_by_id', $type, $exon->dbID, $lexon);
#$self->add('genes_by_exon_id', $type, $exon->dbID, $lgene);
$self->add_list('transcripts_by_exon_id', $type, $exon->dbID, $ltr);
undef $exon;
......@@ -282,6 +348,23 @@ sub build_cache_from_genes {
}
=head2 add
Arg[1] : String $name - a cache name (e.g. 'genes_by_id')
Arg[2] : String type - a cache type (e.g. "source.$slice_name")
Arg[3] : String $key - key of this entry (e.g. a gene dbID)
Arg[4] : Bio::EnsEMBL::IdMappping::TinyFeature $val - value to cache
Example : $cache->add('genes_by_id',
'source.chromosome:NCBI36:X:1:1000000:1', '1234', $tiny_gene);
Description : Adds a TinyFeature object to a named cache.
Return type : Bio::EnsEMBL::IdMapping::TinyFeature
Exceptions : thrown on wrong or missing arguments
Caller : internal
Status : At Risk
: under development
=cut
sub add {
my $self = shift;
my $name = shift;
......@@ -298,6 +381,25 @@ sub add {
return $self->{'cache'}->{$name}->{$type}->{$key};
}
=head2 add_list
Arg[1] : String $name - a cache name (e.g. 'genes_by_id')
Arg[2] : String type - a cache type (e.g. "source.$slice_name")
Arg[3] : String $key - key of this entry (e.g. a gene dbID)
Arg[4] : List of Bio::EnsEMBL::IdMappping::TinyFeature @val - values
to cache
Example : $cache->add_list('transcripts_by_exon_id',
'source.chromosome:NCBI36:X:1:1000000:1', '1234',
$tiny_transcript1, $tiny_transcript2);
Description : Adds a list of TinyFeature objects to a named cache.
Return type : Listref of Bio::EnsEMBL::IdMapping::TinyFeature objects
Exceptions : thrown on wrong or missing arguments
Caller : internal
Status : At Risk
: under development
=cut
sub add_list {
my $self = shift;
my $name = shift;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment