Commit 9440d0ed authored by Graham McVicker's avatar Graham McVicker
Browse files

Add missing fetch_all_by_RawContig_and_score to BaseFeatureAdaptor

Updates so slices can handle negative strand, not finished yet, and not tested
parent 09a68140
......@@ -156,7 +156,7 @@ sub fetch_by_dbID{
}
=head2 fetch_all_by_Contig_constraint
=head2 fetch_all_by_RawContig_constraint
Arg [1] : Bio::EnsEMBL::RawContig $contig
The contig object from which features are to be obtained
......@@ -175,7 +175,7 @@ sub fetch_by_dbID{
=cut
sub fetch_all_by_Contig_constraint {
sub fetch_all_by_RawContig_constraint {
my ($self, $contig, $constraint, $logic_name) = @_;
unless( defined $contig ) {
......@@ -204,7 +204,7 @@ sub fetch_all_by_Contig_constraint {
the contig from which features should be obtained
Arg [2] : (optional) string $logic_name
the logic name of the type of features to obtain
Example : @fts = $a->fetch_all_by_Contig($contig, 'wall');
Example : @fts = $a->fetch_all_by_RawContig($contig, 'wall');
Description: Returns a list of features created from the database which are
are on the contig defined by $cid If logic name is defined,
only features with an analysis of type $logic_name will be
......@@ -218,31 +218,20 @@ sub fetch_all_by_Contig_constraint {
sub fetch_all_by_RawContig {
my ( $self, $contig, $logic_name ) = @_;
return $self->fetch_all_by_Contig_constraint($contig, '',$logic_name);
}
# old naming convention, replace calls to this.
sub fetch_all_by_Contig{
my ($self, $contig, $logic_name) = @_;
$self->warn( "Please use ...by_RawContig instead of ...by_Contig" );
#fetch by contig id constraint with empty constraint
return $self->fetch_all_by_Contig_constraint($contig, '',$logic_name);
return $self->fetch_all_by_RawContig_constraint($contig, '',$logic_name);
}
=head2 fetch_all_by_Contig_and_score
=head2 fetch_all_by_RawContig_and_score
Arg [1] : Bio::EnsEMBL::RawContig $contig
the contig from which features should be obtained
Arg [2] : (optional) float $score
the lower bound of the score of the features to obtain
Arg [3] : (optional) string $logic_name
the logic name of the type of features to obtain
Example : @fts = $a->fetch_by_Contig_and_score(1, 50.0, 'Swall');
Example : @fts = $a->fetch_by_RawContig_and_score(1, 50.0, 'Swall');
Description: Returns a list of features created from the database which are
are on the contig defined by $cid and which have score greater
are on the contig defined by $cid and which have score greater
than score. If logic name is defined, only features with an
analysis of type $logic_name will be returned.
Returntype : listref of Bio::EnsEMBL::*Feature in contig coordinates
......@@ -251,7 +240,7 @@ sub fetch_all_by_Contig{
=cut
sub fetch_all_by_Contig_and_score{
sub fetch_all_by_RawContig_and_score{
my($self, $contig, $score, $logic_name) = @_;
my $constraint;
......@@ -260,7 +249,7 @@ sub fetch_all_by_Contig_and_score{
$constraint = "score > $score";
}
return $self->fetch_all_by_Contig_constraint($contig, $constraint,
return $self->fetch_all_by_RawContig_constraint($contig, $constraint,
$logic_name);
}
......@@ -357,15 +346,16 @@ sub fetch_all_by_Slice_constraint {
return $self->{'_slice_feature_cache'}{$key}
if $self->{'_slice_feature_cache'}{$key};
my $chr_start = $slice->chr_start();
my $chr_end = $slice->chr_end();
my $slice_start = $slice->chr_start();
my $slice_end = $slice->chr_end();
my $slice_strand = $slice->strand();
my $mapper =
$self->db->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
#get the list of contigs this slice is on
my @cids =
$mapper->list_contig_ids( $slice->chr_name, $chr_start ,$chr_end );
$mapper->list_contig_ids( $slice->chr_name, $slice_start ,$slice_end );
return [] unless scalar(@cids);
......@@ -378,6 +368,7 @@ sub fetch_all_by_Slice_constraint {
$constraint = "contig_id IN ($cid_list)";
}
#for speed the remapping to slice may be done at the time of object creation
my $features =
$self->generic_fetch($constraint, $logic_name, $mapper, $slice);
......@@ -386,10 +377,14 @@ sub fetch_all_by_Slice_constraint {
return $self->{'_slice_feature_cache'}{$key} = $features;
}
#remapping has not been done, we have to do our own
my @out = ();
#convert the features to slice coordinates from raw contig coordinates
my ($feat_start, $feat_end, $feat_strand);
foreach my $f (@$features) {
#since feats were obtained in contig coords, attached seq is a contig
my $contig_id = $f->contig->dbID();
......@@ -398,13 +393,19 @@ sub fetch_all_by_Slice_constraint {
$mapper->fast_to_assembly($contig_id, $f->start(),
$f->end(),$f->strand(),"rawcontig");
# not defined start means gap
# undefined start means gap
next unless defined $start;
# maps to region outside desired area
next if ($start > $chr_end) || ($end <= $chr_start);
next if ($start > $slice_end) || ($end < $slice_start);
#shift the feature start, end and strand in one call
$f->move( $start - $chr_start, $end - $chr_start, $strand );
if($slice_strand == -1) {
$f->move( $slice_end - $end + 1, $slice_end - $start + 1, -$strand );
} else {
$f->move( $start - $slice_start + 1, $end - $slice_start + 1, $strand );
}
$f->contig($slice);
push @out,$f;
......@@ -626,6 +627,69 @@ sub fetch_by_Contig_and_score {
}
=head2 fetch_all_by_Contig
Arg [1] : none
Example : none
Description: DEPRECATED use fetch_all_by_RawContig instead
Returntype : none
Exceptions : none
Caller : none
=cut
sub fetch_all_by_Contig {
my ($self, @args) = @_;
$self->warn("fetch_all_by_Contig has been renamed fetch_all_by_RawContig\n" . caller);
return $self->fetch_all_by_RawContig(@args);
}
=head2 fetch_all_by_Contig_and_score
Arg [1] : none
Example : none
Description: DEPRECATED use fetch_all_by_RawContig_and_score instead
Returntype : none
Exceptions : none
Caller : none
=cut
sub fetch_all_by_Contig_and_score {
my ($self, @args) = @_;
$self->warn("fetch_all_by_Contig_and_score has been renamed fetch_all_by_RawContig_and_score\n" . caller);
return $self->fetch_all_by_RawContig_and_score(@args);
}
=head2 fetch_all_by_Contig_constraint
Arg [1] : none
Example : none
Description: DEPRECATED use fetch_all_by_RawContig_constraint instead
Returntype : none
Exceptions : none
Caller : none
=cut
sub fetch_all_by_Contig_constraint {
my ($self, @args) = @_;
$self->warn("fetch_all_by_Contig_constraint has been renamed fetch_all_by_RawContig_constraint\n" . caller);
return $self->fetch_all_by_RawContig_constraint(@args);
}
=head2 fetch_by_Slice_and_score
Arg [1] : none
......
......@@ -198,32 +198,50 @@ sub _objs_from_sth {
if($slice) {
my ($chr, $start, $end, $strand);
my $slice_start = $slice->chr_start() - 1;
my $slice_name = $slice->name();
my $slice_start = $slice->chr_start();
my $slice_end = $slice->chr_end();
my $slice_strand = $slice->strand();
my $slice_name = $slice->name();
my ($feat_start, $feat_end, $feat_strand);
while($row = shift @$row_cache) {
($dna_align_feature_id, $contig_id, $analysis_id, $contig_start,
$contig_end, $contig_strand, $hit_start, $hit_end, $hit_name,
$hit_strand, $cigar_line, $evalue, $perc_ident, $score) = @$row;
$analysis = $a_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
#convert contig coordinates to assembly coordinates
($chr, $start, $end, $strand) =
$mapper->fast_to_assembly($contig_id, $contig_start,
$contig_end, $contig_strand);
unless(defined $start) {
next;
#if mapped to gap, skip
next unless(defined $start);
#if mapped outside slice region, skip
next if ($start > $slice_end) || ($end < $slice_start);
#convert assembly coordinates to slice coordinates
if($slice_strand == -1) {
$feat_start = $slice_end - $end + 1;
$feat_end = $slice_end - $start + 1;
$feat_strand = -$strand;
} else {
$feat_start = $start - $slice_start + 1;
$feat_end = $end - $slice_start + 1;
$feat_strand = $strand;
}
$analysis = $a_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
push @features, Bio::EnsEMBL::DnaDnaAlignFeature->new_fast(
{'_gsf_tag_hash' => {},
'_gsf_sub_array' => [],
'_parse_h' => {},
'_analysis' => $analysis,
'_gsf_start' => $start - $slice_start,
'_gsf_end' => $end - $slice_start,
'_gsf_strand' => $strand,
'_gsf_start' => $feat_start,
'_gsf_end' => $feat_end,
'_gsf_strand' => $feat_strand,
'_gsf_score' => $score,
'_seqname' => $slice_name,
'_percent_id' => $perc_ident,
......
......@@ -366,7 +366,8 @@ sub fetch_all_by_contig_list{
sub fetch_all_by_Slice {
my ( $self, $slice, $type ) = @_;
my @out;
my @out = ();
#check the cache which uses the slice name as it key
if($self->{'_slice_gene_cache'}{$slice->name()}) {
......@@ -408,7 +409,7 @@ sub fetch_all_by_Slice {
}
#place the results in an LRU cache
#$self->{'_slice_gene_cache'}{$slice->name} = \@out;
return $self->{'_slice_gene_cache'}{$slice->name} = \@out;
return \@out;
}
......@@ -510,25 +511,25 @@ sub fetch_by_Peptide_id {
=cut
sub fetch_by_maximum_DBLink {
my $self = shift;
my $external_id = shift;
my @genes=$self->fetch_by_DBEntry( $external_id );
my $biggest;
my $max=0;
my $size=scalar(@genes);
if ($size > 0) {
foreach my $gene (@genes) {
my $size = (scalar(@{$gene->get_all_Exons}));
if ($size > $max) {
$biggest = $gene;
$max=$size;
}
}
return $biggest;
my $self = shift;
my $external_id = shift;
my $genes = $self->fetch_by_DBEntry( $external_id );
my $biggest;
my $max=0;
my $size=scalar(@genes);
if ($size > 0) {
foreach my $gene (@$genes) {
my $size = (scalar(@{$gene->get_all_Exons}));
if ($size > $max) {
$biggest = $gene;
$max=$size;
}
}
return;
return $biggest;
}
return;
}
......@@ -569,7 +570,7 @@ sub get_stable_entry_info {
}
=head2 fetch_by_DBEntry
=head2 fetch_all_by_DBEntry
Arg [1] : in $external_id
the external identifier for the gene to be obtained
......
......@@ -160,9 +160,13 @@ sub _objs_from_sth {
if($slice) {
my ($chr, $start, $end, $strand);
my $slice_start = $slice->chr_start() - 1;
my $slice_name = $slice->name();
my $slice_start = $slice->chr_start();
my $slice_end = $slice->chr_end();
my $slice_name = $slice->name();
my $slice_strand = $slice->strand();
my($feat_start, $feat_end, $feat_strand);
while($row = shift @$row_cache) {
($protein_align_feature_id, $contig_id, $contig_start, $contig_end,
$analysis_id, $contig_strand, $hit_start, $hit_end, $hit_name,
......@@ -173,10 +177,23 @@ sub _objs_from_sth {
$mapper->fast_to_assembly($contig_id, $contig_start,
$contig_end, $contig_strand);
unless(defined $start) {
next;
#skip if feature maps to gap
next unless(defined $start);
#skip if feature outside of slice area
next if ($start > $slice_end) || ($end < $slice_start);
#convert assembly coordinates to slice coordinates
if($slice_strand == -1) {
$feat_start = $slice_end - $end + 1;
$feat_end = $slice_end - $start + 1;
$feat_strand = -$strand;
} else {
$feat_start = $start - $slice_start + 1;
$feat_end = $end - $slice_start + 1;
$feat_strand = $strand;
}
#use a very fast (hack) constructor - normal object construction is too
#slow for the number of features we are potentially dealing with
push @features, Bio::EnsEMBL::DnaPepAlignFeature->new_fast(
......@@ -184,9 +201,9 @@ sub _objs_from_sth {
'_gsf_sub_array' => [],
'_parse_h' => {},
'_analysis' => $analysis,
'_gsf_start' => $start - $slice_start,
'_gsf_end' => $end - $slice_start,
'_gsf_strand' => $strand,
'_gsf_start' => $feat_start,
'_gsf_end' => $feat_end,
'_gsf_strand' => $feat_strand,
'_gsf_score' => $score,
'_seqname' => $slice_name,
'_percent_id' => $perc_ident,
......
......@@ -16,7 +16,7 @@ Bio::EnsEMBL::DBSQL::SequenceAdaptor - produce sequence strings from locations
=head1 SYNOPSIS
$seq_adptr = $database_adaptor->get_SequenceAdaptor();
$dna = $seq_adptr->fetch_by_Contig_start_end_strand($contig, 1, 1000, -1);
$dna = $seq_adptr->fetch_by_RawContig_start_end_strand($contig, 1, 1000, -1);
=head1 DESCRIPTION
......@@ -98,6 +98,7 @@ sub fetch_by_RawContig_start_end_strand {
AND c.contig_id = ?";
}
#use the dna db if defined
if( defined $self->db()->dnadb() ) {
$sth = $self->db()->dnadb()->prepare( $query );
} else {
......@@ -177,7 +178,7 @@ sub fetch_by_Slice_start_end_strand {
(
$slice->chr_end()-$end+1,
$slice->chr_end()-$start+1,
$strand,
$strand * -1, #have to make strand relative to slice's strand
$slice->chr_name(),
$slice->assembly_type()
);
......
......@@ -195,7 +195,6 @@ sub fetch_by_fpc_name {
Returns : Slice object
Args : clone id, [context size in bp]
=cut
sub fetch_by_clone_accession{
......
......@@ -86,14 +86,15 @@ sub new {
$self->chr_name($chr);
$self->chr_start($start);
$self->chr_end($end);
$self->id("$chr.$start-$end");
#set strand to a default of 1 if it is not set
if ( defined $strand) {
$self->strand($strand);
if(!defined $strand) {
$strand = 1; #default slice strand is 1
} else {
$self->strand('1');
unless($strand == 1 || $strand == -1) {
$self->throw("Slice strand must be either -1 or 1 not [$strand].");
}
}
$self->strand($strand);
} else {
$self->strand( 1 );
$self->chr_start( 1 );
......@@ -135,7 +136,6 @@ sub adaptor{
$self->{'adaptor'} = $value;
}
return $self->{'adaptor'};
}
......@@ -185,12 +185,20 @@ sub dbID {
sub name {
my $self = shift;
return join( '', $self->chr_name(), ':', $self->chr_start(),
'-', $self->chr_end());
my $string = join '', $self->chr_name, ':',
$self->chr_start, '-', $self->chr_end();
if($self->strand == -1) {
return "reverse($string)";
}
return $string;
}
=head2 id
Arg [1] : none
......@@ -231,11 +239,40 @@ sub length {
=head2 invert
Arg [1] : none
Example : $slice->invert;
Description: Reverses the strandedness of this slice. If the slice is
on the positive strand it will be placed on the negative
strand of the same region. chr_start and chr_end remain
unchanged, but $slice->seq will return the reverse complement
of the original slice sequence. Genes and Features will also
be returned with reverse strandedness and start and end
coordinates relative to the opposite end of the slice.
Returntype : none
Exceptions : none
Caller : general
=cut
sub invert {
my $self = shift;
my $strand = $self->strand();
$self->strand(-$strand);
}
=head2 seq
Args : none
Function : returns the entire sequence string for this Slice
needs the adaptor to be set.
Function : Returns the entire sequence string for this Slice. This method
will return the reverse complement of the sequence of another
slice on the same region but on the opposite strand.
Note that the slice needs the adaptor to be set to obtain the
sequence.
Returntype: txt
Exceptions: none
Caller : general
......@@ -245,9 +282,7 @@ sub length {
sub seq {
my $self = shift;
my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
my $seq = $seqAdaptor->fetch_by_Slice_start_end_strand( $self, 1, -1, 1 );
return $seq;
return $seqAdaptor->fetch_by_Slice_start_end_strand( $self, 1, -1, 1 );
}
......@@ -258,7 +293,9 @@ sub seq {
relative to start of slice, which is 1.
Arg 2 : int $endBasePair
relative to start of slice.
Arg 3 : int $strand
Arg 3 : (optional) int $strand
The strand of the slice to obtain sequence from. Default value is
1.
Function : returns string of dna sequence
Returntype: txt
Exceptions: end should be at least as big as start
......@@ -274,9 +311,10 @@ sub subseq {
$self->throw("End coord is less then start coord");
}
if ( !defined $strand || ( $strand != -1 && $strand != 1 )) {
# $self->throw("Incorrect strand information set to call on Slice subseq.");
$strand = 1;
$strand = 1 unless(defined $strand);
if ( $strand != -1 && $strand != 1 ) {
$self->throw("Invalid strand [$strand] in call to Slice::subseq.");
}
my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment