Commit e23bf1fe authored by Arne Stabenau's avatar Arne Stabenau
Browse files

Finished, compiles, but is not tested yet.

parent 08ac1349
......@@ -16,7 +16,6 @@ SequenceAdaptor - produce sequence strings from locations
=head1 SYNOPSIS
=head1 DESCRIPTION
......@@ -38,47 +37,77 @@ use vars qw(@ISA);
use strict;
# Object preamble - inheriets from Bio::Root::Object
use Bio::EnsEMBL::DBSQL::Adaptor;
use Bio::EnsEMBL::DBSQL::BaseAdaptor;
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
=head2 fetch_by_contig_id_start_end
=head2 fetch_by_contig_id_start_end_strand
Arg 1 : int rawContigdbID
Arg 2 : int startBasePair
Arg 3 : int endBasePair
a -1 means until the end
Arg 4 : int strand
-1, 1 are possible values
Function : retrieves the dna string from the database from the
given RawContig internal id.
Returntype: txt
Exceptions: endBasePair should be less or equal than length of contig
returns undef if query fails
Caller : Bio::EnsEMBL::RawContig::seq(), RawContig::subseq()
=cut
sub fetch_by_contig_id_start_end {
my ( $self, $contig_id, $start, $end ) = @_;
sub fetch_by_contig_id_start_end_strand {
my ( $self, $contig_id, $start, $end, $strand ) = @_;
my $sth;
if( $start < 1 ) {
$self->throw( "Wrong parameters" );
}
if( $end == -1 ) {
$sth = $self->prepare( "SELECT SUBSTRING( sequence, $start )
$sth = $self->prepare( "SELECT c.length, SUBSTRING( d.sequence, $start )
FROM dna d, contig c
WHERE d.dna_id = c.dna_id
AND c.contig_id = $contig_id" );
} else {
my $length = $end - $start + 1;
if( $length < 1 ) {
$self->throw( "Wrong parameters" );
}
$sth = $self->prepare( "SELECT c.length, SUBSTRING( d.sequence, $start, $length )
FROM dna d, contig c
WHERE d.dna_id = c.dna_id
AND c.contig_id = $contig_id" );
}
$sth->execute();
my($subseq) = $sth->fetchrow
or $self->throw("Could not fetch substr of contig " .$contig_id );
$sth->execute();
if( my $aref = $sth->fetchrow_arrayref() ) {
my ( $length, $seq ) = @$aref;
if( $strand == -1 ) {
return $self->_reverse_comp( $seq );
} else {
return $seq;
}
} else {
return undef;
}
}
=head2 fetch_by_Slice_start_end
=head2 fetch_by_Slice_start_end_strand
Arg 1 : Bio::EnsEMBL::Slice slice
The slice from which you want the sequence
......@@ -86,6 +115,8 @@ sub fetch_by_contig_id_start_end {
count from 1
Arg 3 : int endBasePair
count from 1, -1 is last one
Arg 4 : int strand
1, -1
Function : retrieves from db the sequence for this slice
uses AssemblyMapper to find the assembly
Returntype: txt
......@@ -95,199 +126,101 @@ sub fetch_by_contig_id_start_end {
=cut
sub fetch_by_Slice_start_end {
my ( $self, $slice, $start, $end ) = @_;
sub fetch_by_Slice_start_end_strand {
my ( $self, $slice, $start, $end, $strand ) = @_;
}
=head2 fetch_by_assembly_location
Arg [1] : none, txt, int, Bio::EnsEMBL::Example formal_parameter_name
Additional description lines
list, listref, hashref
Function : testable description
Returntype: none, txt, int, float, Bio::EnsEMBL::Example
Exceptions: none
Caller : object::methodname or just methodname
Example : ( optional )
=cut
sub fetch_by_assembly_location {
my $self = shift;
}
=head2 subseq
Title : subseq
Usage : $substring = $obj->subseq(10,40);
Function: returns the subseq from start to end, where the first base
is 1 and the number is inclusive, ie 1-2 are the first two
bases of the sequence
Start cannot be larger than end but can be equal
Returns : a string
Args : start and end scalars
=cut
sub subseq{
my ($self,$start,$end) = @_;
if( $start > $end ){
$self->throw("in subseq, start [$start] cannot be greater than end [$end]");
}
my $add = 0;
if( $end > $self->length ) {
print STDERR ("TROUBLE - $end greater than length ".$self->length);
$add = $end-$self->length;
$end = $self->length;
if( !$slice ){
$self->throw("need a slice to work\n");
}
#if( $start <= 0 || $end > $self->length ) {
# $self->throw("You have to have start positive and length less than the total length of sequence - calling $start:$end vs".$self->length);
#}
unless
($slice->isa("Bio::EnsEMBL::Slice")) {
$self->throw("$slice isn't a slice");
}
my $id=$self->dna_id;
my $length= $end-$start+1;
$self->fetch_by_assembly_location
(
$slice->chr_start()+$start-1,
$slice->chr_start()+$end-1,
$strand,
$slice->chr_name(),
$slice->assembly_type()
);
my $sth=$self->db_handle->prepare("SELECT SUBSTRING(sequence,$start,$length) FROM dna WHERE dna_id = $id");
$sth->execute();
my($subseq) = $sth->fetchrow
or $self->throw("Could not fetch substr of dna " .$id);
$subseq .= 'N' x $add;
return $subseq;
}
=head2 moltype
Title : moltype
Usage : if( $obj->moltype eq 'dna' ) { /Do Something/ }
Function: Returns the type of sequence
Returns : dna
Args : none
=head2 fetch_by_assembly_location
=cut
sub moltype{
return "dna";
}
=head2 id
Title : id
Usage : $id = $seq->id()
Function: maps to display id
Returns : display id
Args : none
=cut
sub id {
my ($self)= @_;
return $self->display_id();
}
=head2 length
Title : length
Usage : $len = $seq->length()
Function: Returns the length of the sequence
Returns : scalar
Args : none
Arg 1 : int $chrStart
Arg 2 : int $chrEnd
Arg 3 : int $strand
Arg 3 : txt $chrName
Arg 4 : txt $assemblyType
Function : retrieve speciefied sequence from db. Using AssemblyMapper.
Returntype: txt
Exceptions: Wrong parameters give undef as result
Caller : general, fetch_by_Slice_start_end_strand
=cut
sub length {
my ($self)= @_;
if( defined $self->_length() ) {
return $self->_length();
}
sub fetch_by_assembly_location {
my ( $self, $chrStart, $chrEnd,
$strand, $chrName, $assemblyType ) = @_;
my $id=$self->dna_id;
my $mapper = $self->db->get_AssemblyMapperAdaptor->fetch_by_type($assemblyType);
$mapper->register_region($chrName,$chrStart,$chrEnd);
my $sth=$self->db_handle->prepare("SELECT length(sequence) FROM dna WHERE dna_id = $id");
$sth->execute();
my @coord_list = $mapper->map_coordinates_to_rawcontig
( $chrStart, $chrEnd, $chrName, $strand );
my($length) = $sth->fetchrow
or $self->throw("Could not determine length of dna " .$id);
$self->_length($length);
return $length;
}
=head2 _length
Title : _length
Usage : $obj->_length($newval)
Function:
Returns : value of _length
Args : newvalue (optional)
=cut
sub _length{
my $obj = shift;
if( @_ ) {
my $value = shift;
$obj->{'_length'} = $value;
}
return $obj->{'_length'};
# for each of the pieces get sequence
my $seq = "";
for my $segment ( @coord_list ) {
if( $segment->isa( "Bio::EnsEMBL::Mapper::Coordinate" )) {
my $contig_seq = $self->fetch_by_contig_id_start_end_strand
( $segment->id(),
$segment->start(),
$segment->end(),
$segment->strand() );
$seq .= $contig_seq;
} else {
# its a gap
my $length = $segment->end() - $segment->start() + 1;
$seq .= "N" x $length;
}
}
return $seq;
}
=head2 can_call_new
Title : can_call_new
Usage : if( $obj->can_call_new ) {
$newobj = $obj->new( %param );
}
Function: indicates that this object can call the ->new method
Example :
Returns : 1 or 0
Args :
=head2 _reverse_comp
Arg 1 : txt $dna_sequence
Function : build reverse complement string
Returntype: txt
Exceptions: none
Caller : private to this module
=cut
sub can_call_new{
return 0;
sub _reverse_comp {
my $self = shift;
my $seq = shift;
$_ = reverse( $seq );
tr/CGTAcgta/GCATgcat/;
return $_;
}
=head2 desc
Title : desc
Usage : $obj->desc($newval)
Function:
Example :
Returns : value of desc
Args : newvalue (optional)
=cut
sub desc {
my ($self,$value) = @_;
if( defined $value && $value ne '' ) {
$self->{'desc'} = $value;
}
return $self->{'desc'} || '';
}
1;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment