From ec9c73945f5028ea6c07744fe60ec73675805f4f Mon Sep 17 00:00:00 2001
From: Ian Longden <ianl@sanger.ac.uk>
Date: Wed, 6 Oct 2004 11:42:44 +0000
Subject: [PATCH]  gen tidy

---
 .../xref_mapping/XrefMapper/BasicMapper.pm    | 173 ++++++++++++------
 1 file changed, 119 insertions(+), 54 deletions(-)

diff --git a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm
index 8e2950e762..5e105cc571 100644
--- a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm
+++ b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm
@@ -1,23 +1,47 @@
 package XrefMapper::BasicMapper;
- 
+
 use strict;
 use DBI;
 use Bio::EnsEMBL::DBSQL::DBAdaptor;
 use Bio::EnsEMBL::Translation;
 
+=head1 NAME
+
+XrefMapper::BasicMapper
+
+=head1 DESCIPTION
+
+This is the basic mapper routine. It will create the necessary fasta files for
+both the xref and ensembl sequences. These will then be matched using exonerate
+and the results written to another file. The xref database is hard coded at the
+beginning. By creating a <species>.pm file and inheriting from this base class
+different matching routines, parameters, data sets etc can be set.
+
+=head1 CONTACT
+
+Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk
+
+
+=cut
+
+
+#
+# Specify xref database here
+#
 my $xref_host = "ecs1g";
 my $xref_port = 3306;
 my $xref_database = "ianl_test_xref";
 my $xref_user = "ensadmin";
 my $xref_password = "ensembl";
- 
- 
+
+
+
 sub new {
   my($class, $species, $host, $port, $dbname, $user, $password ,$dir) = @_;
- 
+
   my $self ={};
   bless $self,$class;
- 
+
   $self->species($species);
   $self->host($host);
   $self->port($port);
@@ -32,18 +56,26 @@ sub new {
 
 
 
-
 sub dump_seqs{
   my ($self, $slice) = @_;
   $self->dump_ensembl($slice);
   $self->dump_xref();
 }
 
+sub run_matching{
+  print "NOT done yet:-)\n";
+}
+
+sub store{
+  print "NOT done yet Either :-)\n";
+}
 
 sub dump_xref{
   my ($self) = @_;
 
-
+  #
+  # the species specified must be in the database and hence have a species_id
+  #
   my $sql = "select species_id from species where name = '".$self->species."'";
   my $dbi = $self->dbi();
   my $sth = $dbi->prepare($sql);
@@ -63,9 +95,13 @@ sub dump_xref{
     }
     die("Please try again\n");
   }
-                                                                                                                            
 
-  open(XDNA,">".$self->dir."/xref_dna.fasta") || die "Could not open xref_dna.fasta";
+  #
+  # Dump out the sequences where the species has an id of species_id
+  # and the sequence type is 'dna'
+  #
+  $self->xref_dna_file($self->dir."/".$self->species."_xref_dna.fasta");
+  open(XDNA,">".$self->dir."/".$self->species."_xref_dna.fasta") || die "Could not open xref_dna.fasta";
   my $sql = "select p.xref_id, p.sequence from primary_xref p, xref x ";
   $sql   .= "where p.xref_id = x.xref_id and ";
   $sql   .= "      p.sequence_type ='dna' and ";
@@ -84,7 +120,12 @@ sub dump_xref{
 ENDDNA:
   close XDNA;
 
-  open(XPEP,">".$self->dir."/xref_prot.fasta") || die "Could not open xref_prot.fasta";
+  #
+  # Dump out the sequences where the species has an id of species_id
+  # and the sequence type is 'peptide'
+  #
+  $self->xref_protein_file($self->dir."/".$self->species."_xref_prot.fasta");
+  open(XPEP,">".$self->dir."/".$self->species."_xref_prot.fasta") || die "Could not open xref_prot.fasta";
   $sql    = "select p.xref_id, p.sequence from primary_xref p, xref x ";
   $sql   .= "where p.xref_id = x.xref_id and ";
   $sql   .= "      p.sequence_type ='peptide' and ";
@@ -109,19 +150,14 @@ ENDXPEP:
 sub dump_ensembl{
   my ($self) = @_;
 
-  #create filename
-  $self->ensembl_protein_file($self->dir."/".$self->species."_protein.fasta");
-  $self->ensembl_dna_file($self->dir."/".$self->species."_dna.fasta");
-
-
   $self->fetch_and_dump_seq();
-  
+
 }
 
 
 sub fetch_and_dump_seq{
-  my ($self, $type, $adaptortype) = @_;
- 
+  my ($self) = @_;
+
   my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(-species => $self->species(),
                            -dbname  => $self->dbname(),
                            -host    => $self->host(),
@@ -129,10 +165,18 @@ sub fetch_and_dump_seq{
                            -password => $self->password(),
                            -user     => $self->user(),
                            -group    => 'core');
- 
+
+  #
+  # store ensembl dna file name and open it
+  #
+  $self->ensembl_dna_file($self->dir."/".$self->species."_dna.fasta");
   open(DNA,">".$self->ensembl_dna_file()) 
     || die("Could not open dna file for writing: ".$self->ensembl_dna_file."\n");
 
+  #
+  # store ensembl protein file name and open it
+  #
+  $self->ensembl_protein_file($self->dir."/".$self->species."_protein.fasta");
   open(PEP,">".$self->ensembl_protein_file()) 
     || die("Could not open dna file for writing: ".$self->ensembl_protein_file."\n");
 
@@ -162,12 +206,45 @@ FIN:
   close DNA;
   close PEP;
 }
-                                      
 
 
+
+sub dbi {
+
+  my $self = shift;
+
+  my $dbi = DBI->connect("dbi:mysql:host=$xref_host;port=$xref_port;database=$xref_database",
+                        "$xref_user",
+                        "$xref_password",
+ 			 {'RaiseError' => 1}) || die "Can't connect to database";
+
+
+  return $dbi;
+}
+
+###
+# Getter/Setter methods
+###
+
+sub xref_protein_file{
+  my ($self, $arg) = @_;
+
+  (defined $arg) &&
+    ($self->{_xref_prot_file} = $arg );
+  return $self->{_xref_prot_file};
+}
+
+sub xref_dna_file{
+  my ($self, $arg) = @_;
+
+  (defined $arg) &&
+    ($self->{_xref_dna_file} = $arg );
+  return $self->{_xref_dna_file};
+}
+
 sub ensembl_protein_file{
   my ($self, $arg) = @_;
- 
+
   (defined $arg) &&
     ($self->{_ens_prot_file} = $arg );
   return $self->{_ens_prot_file};
@@ -175,42 +252,42 @@ sub ensembl_protein_file{
 
 sub ensembl_dna_file{
   my ($self, $arg) = @_;
- 
+
   (defined $arg) &&
     ($self->{_ens_dna_file} = $arg );
   return $self->{_ens_dna_file};
 }
 
 
-sub get_ensembl_type{
-  my %type;
-
-  $type{'Translation'} = "peptide";
-  $type{'Transcript'} = "dna";
-
-  return \%type;
-}
+#sub get_ensembl_type{
+#  my %type;
+#
+#  $type{'Translation'} = "peptide";
+#  $type{'Transcript'} = "dna";
+#
+#  return \%type;
+#}
 
 sub species {
   my ($self, $arg) = @_;
- 
+
   (defined $arg) &&
     ($self->{_species} = $arg );
   return $self->{_species};
 }
- 
+
 sub host {
   my ($self, $arg) = @_;
- 
+
   (defined $arg) &&
     ($self->{_host} = $arg );
   return $self->{_host};
 }
- 
+
 
 sub port {
   my ($self, $arg) = @_;
- 
+
   (defined $arg) &&
     ($self->{_port} = $arg );
   return $self->{_port};
@@ -218,23 +295,23 @@ sub port {
 
 sub dbname {
   my ($self, $arg) = @_;
- 
+
   (defined $arg) &&
     ($self->{_dbname} = $arg );
   return $self->{_dbname};
 }
- 
+
 sub user {
   my ($self, $arg) = @_;
- 
+
   (defined $arg) &&
     ($self->{_user} = $arg );
   return $self->{_user};
 }
- 
+
 sub password {
   my ($self, $arg) = @_;
- 
+
   (defined $arg) &&
     ($self->{_password} = $arg );
   return $self->{_password};
@@ -242,25 +319,13 @@ sub password {
 
 sub dir {
   my ($self, $arg) = @_;
- 
+
   (defined $arg) &&
     ($self->{_dir} = $arg );
   return $self->{_dir};
 }
 
 
-sub dbi {
- 
-  my $self = shift;
-  
-  my $dbi = DBI->connect("dbi:mysql:host=$xref_host;port=$xref_port;database=$xref_database",
-                        "$xref_user",
-                        "$xref_password",
- 			 {'RaiseError' => 1}) || die "Can't connect to database";
-  
- 
-  return $dbi;
-}
- 
+
  
 1;
-- 
GitLab