Newer
Older
use IPC::Open3;
use Bio::EnsEMBL::DBSQL::DBAdaptor;
=head1 NAME
XrefMapper::BasicMapper
=head1 DESCIPTION
This is the basic mapper routine. It will create the necessary fasta files for
both the xref and ensembl sequences. These will then be matched using exonerate
Ian Longden
committed
and the results written to another file. By creating a <species>.pm file and
inheriting from this base class different matching routines, parameters, data
sets etc can be set.
=head1 CONTACT
Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk
=cut
# Hashes to hold method-specific thresholds
my %method_query_threshold;
my %method_target_threshold;
# Various useful variables.
my %translation_to_transcript;
my %transcript_to_translation;
my %genes_to_transcripts;
my %xref_to_source;
my %object_xref_mappings;
my %object_xref_identities;
my %xref_descriptions;
my %xref_accessions;
Glenn Proctor
committed
my %xref_labels;
my %source_to_external_db;
Glenn Proctor
committed
my %xrefs_written;
my %object_xrefs_written;
my %failed_xref_mappings;
my %updated_source;
Ian Longden
committed
=head2 new
Description: Constructor for BasicMapper.
Returntype : BasicMapper
Exceptions : none
Caller : general
=cut
sub new{
my($class, @args) = @_;
my $self ={};
bless $self,$class;
return $self;
}
Glenn Proctor
committed
=head2 dump_seqs
Arg[1]: xref object which holds info needed for the dump of xref
Description: Dumps out the files for the mapping. Xref object should hold
the value of the databases and source to be used.
Returntype : none
Exceptions : will die if species not known or an error occurs while
: trying to write to files.
Caller : general
=cut
Glenn Proctor
committed
my ($self, $location) = @_;
$self->dump_xref();
Glenn Proctor
committed
$self->dump_ensembl($location);
Arg[1]: xref object which holds info on method and files.
Description: runs the mapping of the list of files with species methods
Returntype : none
Exceptions : none
Caller : general
=cut
sub build_list_and_map {
my ($self) = @_;
my @list=();
my $i = 0;
foreach my $method (@{$self->method()}){
my @dna=();
push @dna, $method;
push @dna, $self->xref->dir."/xref_".$i."_dna.fasta";
Ian Longden
committed
push @dna, $self->core->dna_file();
push @list, \@dna;
my @pep=();
push @pep, $method;
push @pep, $self->xref->dir."/xref_".$i."_peptide.fasta";
Ian Longden
committed
push @pep, $self->core->protein_file();
push @list, \@pep;
$i++;
}
$self->run_mapping(\@list);
}
=head2 get_species_id_from_species_name
Arg[1]: species name
Description: get the species_id from the database for the named database.
Example : my $id = get_species_id_from_species_name('homo_sapiens');
Returntype : int (species_id)
Exceptions : will die if species does not exist in given xref database.
Caller : general
=cut
sub get_species_id_from_species_name{
Ian Longden
committed
my ($self,$species) = @_;
Ian Longden
committed
my $sql = "select species_id from species where name = '".$species."'";
Ian Longden
committed
my $sth = $self->dbc->prepare($sql);
$sth->execute();
my @row = $sth->fetchrow_array();
my $species_id;
Ian Longden
committed
if (@row) {
$species_id = $row[0];
} else {
print STDERR "Couldn't get ID for species ".$species."\n";
print STDERR "It must be one of :-\n";
$sql = "select name from species";
Ian Longden
committed
$sth = $self->dbc->prepare($sql);
$sth->execute();
while(my @row = $sth->fetchrow_array()){
print STDERR $row[0]."\n";
}
die("Please try again :-)\n");
Glenn Proctor
committed
return $species_id;
}
=head2 get_set_lists
Description: specifies the list of databases and source to be used in the
: generation of one or more data sets.
Returntype : list of lists
Example : my @lists =@{$self->get_set_lists()};
Exceptions : none
Caller : dump_xref
=cut
sub get_set_lists{
my ($self) = @_;
return [["ExonerateGappedBest1", ["*","*"]]];
}
=head2 get_source_id_from_source_name
Arg[1]: source name
Description: get the source_id from the database for the named source.
Example : my $id = get_source_id_from_source_name('RefSeq');
Returntype : int (source_id)
Exceptions : will die if source does not exist in given xref database.
Loading full blame...