Skip to content
Snippets Groups Projects
BasicMapper.pm 5.96 KiB
Newer Older
Ian Longden's avatar
Ian Longden committed
package XrefMapper::BasicMapper;
Ian Longden's avatar
Ian Longden committed

use strict;
use DBI;
use Bio::EnsEMBL::DBSQL::DBAdaptor;
Ian Longden's avatar
Ian Longden committed
use Bio::EnsEMBL::Translation;
use  XrefMapper::db;

use vars '@ISA';

@ISA = qw{ XrefMapper::db };

Ian Longden's avatar
Ian Longden committed

Ian Longden's avatar
Ian Longden committed
=head1 NAME

XrefMapper::BasicMapper

=head1 DESCIPTION

This is the basic mapper routine. It will create the necessary fasta files for
both the xref and ensembl sequences. These will then be matched using exonerate
and the results written to another file. By creating a <species>.pm file and 
inheriting from this base class different matching routines, parameters, data 
sets etc can be set.
Ian Longden's avatar
Ian Longden committed

=head1 CONTACT

Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk


=cut


  my ($self, $xref) = @_;
  $self->dump_ensembl();
  $self->dump_xref($xref);
Ian Longden's avatar
Ian Longden committed
sub run_matching{
  print "NOT done yet:-)\n";
}

sub store{
  print "NOT done yet Either :-)\n";
}
  my ($self,$xref) = @_;

  if(!defined($xref->species())){
    $xref->species($self->species);
  }
  if(!defined($xref->dir())){
    if(defined($self->species)){
      $xref->species($self->species)
    }
    else{
      $xref->species(".");
    }
  }
Ian Longden's avatar
Ian Longden committed
  #
  # the species specified must be in the database and hence have a species_id
  #
  my $sql = "select species_id from species where name = '".$xref->species."'";
  my $dbi = $xref->dbi();
Ian Longden's avatar
Ian Longden committed
  my $sth = $dbi->prepare($sql);
  $sth->execute();
  my @row = $sth->fetchrow_array();
  my $species_id;
  if (defined @row) {
    $species_id = $row[0];
  } else {
    print STDERR "Couldn't get ID for species ".$xref->species."\n";
    print STDERR "It must be one of :-\n";
    $sql = "select name from species";
    $sth = dbi()->prepare($sql);
    $sth->execute();
    while(my @row = $sth->fetchrow_array()){
      print STDERR $row[0]."\n";
    }
    die("Please try again\n");
  }

Ian Longden's avatar
Ian Longden committed
  #
  # Dump out the sequences where the species has an id of species_id
  # and the sequence type is 'dna'
  #
  $self->xref_dna_file($xref->dir."/".$xref->species."_xref_dna.fasta");
  open(XDNA,">".$xref->dir."/".$xref->species."_xref_dna.fasta") || die "Could not open xref_dna.fasta";
  my $sql = "select p.xref_id, p.sequence from primary_xref p, xref x ";
  $sql   .= "where p.xref_id = x.xref_id and ";
  $sql   .= "      p.sequence_type ='dna' and ";
  $sql   .= "      x.species_id = ".$species_id." ";
  $sth = $xref->dbi()->prepare($sql);
Ian Longden's avatar
Ian Longden committed
  my $i = 0;
  while(my @row = $sth->fetchrow_array()){
Ian Longden's avatar
Ian Longden committed
    $i++;
    $row[1] =~ s/(.{60})/$1\n/g;
    print XDNA ">".$row[0]."\n".$row[1]."\n";
Ian Longden's avatar
Ian Longden committed
    if($i > 10){
      goto ENDDNA;
    }
Ian Longden's avatar
Ian Longden committed
ENDDNA:
Ian Longden's avatar
Ian Longden committed
  #
  # Dump out the sequences where the species has an id of species_id
  # and the sequence type is 'peptide'
  #
  $self->xref_protein_file($xref->dir."/".$self->species."_xref_prot.fasta");
  open(XPEP,">".$xref->dir."/".$self->species."_xref_prot.fasta") || die "Could not open xref_prot.fasta";
  $sql    = "select p.xref_id, p.sequence from primary_xref p, xref x ";
  $sql   .= "where p.xref_id = x.xref_id and ";
  $sql   .= "      p.sequence_type ='peptide' and ";
  $sql   .= "      x.species_id = ".$species_id." ";
  $sth = $xref->dbi()->prepare($sql);
Ian Longden's avatar
Ian Longden committed
  $i = 0;
  while(my @row = $sth->fetchrow_array()){
Ian Longden's avatar
Ian Longden committed
    $i++;
    $row[1] =~ s/(.{60})/$1\n/g;
    print XPEP ">".$row[0]."\n".$row[1]."\n";
Ian Longden's avatar
Ian Longden committed
    if($i > 10){
      goto ENDXPEP;
    }
Ian Longden's avatar
Ian Longden committed
ENDXPEP:
  close XPEP;

}


sub dump_ensembl{
  my ($self) = @_;

  $self->fetch_and_dump_seq();
Ian Longden's avatar
Ian Longden committed

Ian Longden's avatar
Ian Longden committed
  my ($self) = @_;

  my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(-species => $self->species(),
                           -dbname  => $self->dbname(),
                           -host    => $self->host(),
                           -port    => $self->port(),
                           -password => $self->password(),
Ian Longden's avatar
Ian Longden committed
                           -user     => $self->user(),
Ian Longden's avatar
Ian Longden committed

  #
  # store ensembl dna file name and open it
  #
  
  # if no directory set then dump in the current directory.
  if(!defined($self->dir())){
    $self->dir(".");
  }
Ian Longden's avatar
Ian Longden committed
  $self->ensembl_dna_file($self->dir."/".$self->species."_dna.fasta");
Ian Longden's avatar
Ian Longden committed
  open(DNA,">".$self->ensembl_dna_file()) 
    || die("Could not open dna file for writing: ".$self->ensembl_dna_file."\n");

Ian Longden's avatar
Ian Longden committed
  #
  # store ensembl protein file name and open it
  #
  $self->ensembl_protein_file($self->dir."/".$self->species."_protein.fasta");
Ian Longden's avatar
Ian Longden committed
  open(PEP,">".$self->ensembl_protein_file()) 
    || die("Could not open dna file for writing: ".$self->ensembl_protein_file."\n");

  my $gene_adap = $db->get_GeneAdaptor();
  my @gene_ids = @{$gene_adap->list_dbIDs()};
  my $i =0;
  foreach my $gene_id (@gene_ids){
    $i++;
    my $gene = $gene_adap->fetch_by_dbID($gene_id);
    print "gene ".$gene."\n";
    foreach my $transcript (@{$gene->get_all_Transcripts()}) {
      my $seq = $transcript->spliced_seq(); 
      $seq =~ s/(.{60})/$1\n/g;
Ian Longden's avatar
Ian Longden committed
      print DNA ">" . $transcript->dbID() . "\n" .$seq."\n";
      my $trans = $transcript->translation();
      my $translation = $transcript->translate();
      print "tranlation ".$translation."\n";
      my $pep_seq = $translation->seq();
      $pep_seq =~ s/(.{60})/$1\n/g;
      print PEP ">".$trans->dbID()."\n".$pep_seq."\n";
    }
    if($i > 10){
      goto FIN;
Ian Longden's avatar
Ian Longden committed
FIN:
  close DNA;
  close PEP;
Ian Longden's avatar
Ian Longden committed

###
# Getter/Setter methods
###

sub xref_protein_file{
  my ($self, $arg) = @_;

  (defined $arg) &&
    ($self->{_xref_prot_file} = $arg );
  return $self->{_xref_prot_file};
}

sub xref_dna_file{
  my ($self, $arg) = @_;

  (defined $arg) &&
    ($self->{_xref_dna_file} = $arg );
  return $self->{_xref_dna_file};
}

Ian Longden's avatar
Ian Longden committed
sub ensembl_protein_file{
  my ($self, $arg) = @_;
Ian Longden's avatar
Ian Longden committed

Ian Longden's avatar
Ian Longden committed
  (defined $arg) &&
    ($self->{_ens_prot_file} = $arg );
  return $self->{_ens_prot_file};
}
Ian Longden's avatar
Ian Longden committed
sub ensembl_dna_file{
  my ($self, $arg) = @_;
Ian Longden's avatar
Ian Longden committed

Ian Longden's avatar
Ian Longden committed
  (defined $arg) &&
    ($self->{_ens_dna_file} = $arg );
  return $self->{_ens_dna_file};
}
Ian Longden's avatar
Ian Longden committed
#sub get_ensembl_type{
#  my %type;
#
#  $type{'Translation'} = "peptide";
#  $type{'Transcript'} = "dna";
#
#  return \%type;
#}
Ian Longden's avatar
Ian Longden committed
1;