From ec9c73945f5028ea6c07744fe60ec73675805f4f Mon Sep 17 00:00:00 2001 From: Ian Longden <ianl@sanger.ac.uk> Date: Wed, 6 Oct 2004 11:42:44 +0000 Subject: [PATCH] gen tidy --- .../xref_mapping/XrefMapper/BasicMapper.pm | 173 ++++++++++++------ 1 file changed, 119 insertions(+), 54 deletions(-) diff --git a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm index 8e2950e762..5e105cc571 100644 --- a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm +++ b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm @@ -1,23 +1,47 @@ package XrefMapper::BasicMapper; - + use strict; use DBI; use Bio::EnsEMBL::DBSQL::DBAdaptor; use Bio::EnsEMBL::Translation; +=head1 NAME + +XrefMapper::BasicMapper + +=head1 DESCIPTION + +This is the basic mapper routine. It will create the necessary fasta files for +both the xref and ensembl sequences. These will then be matched using exonerate +and the results written to another file. The xref database is hard coded at the +beginning. By creating a <species>.pm file and inheriting from this base class +different matching routines, parameters, data sets etc can be set. + +=head1 CONTACT + +Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk + + +=cut + + +# +# Specify xref database here +# my $xref_host = "ecs1g"; my $xref_port = 3306; my $xref_database = "ianl_test_xref"; my $xref_user = "ensadmin"; my $xref_password = "ensembl"; - - + + + sub new { my($class, $species, $host, $port, $dbname, $user, $password ,$dir) = @_; - + my $self ={}; bless $self,$class; - + $self->species($species); $self->host($host); $self->port($port); @@ -32,18 +56,26 @@ sub new { - sub dump_seqs{ my ($self, $slice) = @_; $self->dump_ensembl($slice); $self->dump_xref(); } +sub run_matching{ + print "NOT done yet:-)\n"; +} + +sub store{ + print "NOT done yet Either :-)\n"; +} sub dump_xref{ my ($self) = @_; - + # + # the species specified must be in the database and hence have a species_id + # my $sql = "select species_id from species where name = '".$self->species."'"; my $dbi = $self->dbi(); my $sth = $dbi->prepare($sql); @@ -63,9 +95,13 @@ sub dump_xref{ } die("Please try again\n"); } - - open(XDNA,">".$self->dir."/xref_dna.fasta") || die "Could not open xref_dna.fasta"; + # + # Dump out the sequences where the species has an id of species_id + # and the sequence type is 'dna' + # + $self->xref_dna_file($self->dir."/".$self->species."_xref_dna.fasta"); + open(XDNA,">".$self->dir."/".$self->species."_xref_dna.fasta") || die "Could not open xref_dna.fasta"; my $sql = "select p.xref_id, p.sequence from primary_xref p, xref x "; $sql .= "where p.xref_id = x.xref_id and "; $sql .= " p.sequence_type ='dna' and "; @@ -84,7 +120,12 @@ sub dump_xref{ ENDDNA: close XDNA; - open(XPEP,">".$self->dir."/xref_prot.fasta") || die "Could not open xref_prot.fasta"; + # + # Dump out the sequences where the species has an id of species_id + # and the sequence type is 'peptide' + # + $self->xref_protein_file($self->dir."/".$self->species."_xref_prot.fasta"); + open(XPEP,">".$self->dir."/".$self->species."_xref_prot.fasta") || die "Could not open xref_prot.fasta"; $sql = "select p.xref_id, p.sequence from primary_xref p, xref x "; $sql .= "where p.xref_id = x.xref_id and "; $sql .= " p.sequence_type ='peptide' and "; @@ -109,19 +150,14 @@ ENDXPEP: sub dump_ensembl{ my ($self) = @_; - #create filename - $self->ensembl_protein_file($self->dir."/".$self->species."_protein.fasta"); - $self->ensembl_dna_file($self->dir."/".$self->species."_dna.fasta"); - - $self->fetch_and_dump_seq(); - + } sub fetch_and_dump_seq{ - my ($self, $type, $adaptortype) = @_; - + my ($self) = @_; + my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(-species => $self->species(), -dbname => $self->dbname(), -host => $self->host(), @@ -129,10 +165,18 @@ sub fetch_and_dump_seq{ -password => $self->password(), -user => $self->user(), -group => 'core'); - + + # + # store ensembl dna file name and open it + # + $self->ensembl_dna_file($self->dir."/".$self->species."_dna.fasta"); open(DNA,">".$self->ensembl_dna_file()) || die("Could not open dna file for writing: ".$self->ensembl_dna_file."\n"); + # + # store ensembl protein file name and open it + # + $self->ensembl_protein_file($self->dir."/".$self->species."_protein.fasta"); open(PEP,">".$self->ensembl_protein_file()) || die("Could not open dna file for writing: ".$self->ensembl_protein_file."\n"); @@ -162,12 +206,45 @@ FIN: close DNA; close PEP; } - + +sub dbi { + + my $self = shift; + + my $dbi = DBI->connect("dbi:mysql:host=$xref_host;port=$xref_port;database=$xref_database", + "$xref_user", + "$xref_password", + {'RaiseError' => 1}) || die "Can't connect to database"; + + + return $dbi; +} + +### +# Getter/Setter methods +### + +sub xref_protein_file{ + my ($self, $arg) = @_; + + (defined $arg) && + ($self->{_xref_prot_file} = $arg ); + return $self->{_xref_prot_file}; +} + +sub xref_dna_file{ + my ($self, $arg) = @_; + + (defined $arg) && + ($self->{_xref_dna_file} = $arg ); + return $self->{_xref_dna_file}; +} + sub ensembl_protein_file{ my ($self, $arg) = @_; - + (defined $arg) && ($self->{_ens_prot_file} = $arg ); return $self->{_ens_prot_file}; @@ -175,42 +252,42 @@ sub ensembl_protein_file{ sub ensembl_dna_file{ my ($self, $arg) = @_; - + (defined $arg) && ($self->{_ens_dna_file} = $arg ); return $self->{_ens_dna_file}; } -sub get_ensembl_type{ - my %type; - - $type{'Translation'} = "peptide"; - $type{'Transcript'} = "dna"; - - return \%type; -} +#sub get_ensembl_type{ +# my %type; +# +# $type{'Translation'} = "peptide"; +# $type{'Transcript'} = "dna"; +# +# return \%type; +#} sub species { my ($self, $arg) = @_; - + (defined $arg) && ($self->{_species} = $arg ); return $self->{_species}; } - + sub host { my ($self, $arg) = @_; - + (defined $arg) && ($self->{_host} = $arg ); return $self->{_host}; } - + sub port { my ($self, $arg) = @_; - + (defined $arg) && ($self->{_port} = $arg ); return $self->{_port}; @@ -218,23 +295,23 @@ sub port { sub dbname { my ($self, $arg) = @_; - + (defined $arg) && ($self->{_dbname} = $arg ); return $self->{_dbname}; } - + sub user { my ($self, $arg) = @_; - + (defined $arg) && ($self->{_user} = $arg ); return $self->{_user}; } - + sub password { my ($self, $arg) = @_; - + (defined $arg) && ($self->{_password} = $arg ); return $self->{_password}; @@ -242,25 +319,13 @@ sub password { sub dir { my ($self, $arg) = @_; - + (defined $arg) && ($self->{_dir} = $arg ); return $self->{_dir}; } -sub dbi { - - my $self = shift; - - my $dbi = DBI->connect("dbi:mysql:host=$xref_host;port=$xref_port;database=$xref_database", - "$xref_user", - "$xref_password", - {'RaiseError' => 1}) || die "Can't connect to database"; - - - return $dbi; -} - + 1; -- GitLab