Commit cbfc45d3 authored by Ewan Birney's avatar Ewan Birney
Browse files

commits for slices for laura - laura - clone.t is now failing. Unclear if this is me or you

parent 69379ed4
......@@ -174,319 +174,6 @@ sub cigar_string {
}
=head2 _generic_parse_cigar
Arg 1 : int query_unit,
either 1 (peptide) or 3 (dna)
Arg 2 : int hit_unit,
either 1 (peptide) or 3 (dna)
a DNA,DNA alignment should use 1,1 for the query_unit and hit_unit
Function : Converts the cigar_string contained by the module into
an array of ungapped Bio::EnsEMBL::FeaturePair.
See sub cigar_string for an explanation of what that is.
Returntype: list of Bio::EnsEMBL::FeaturePairs
Exceptions: Mainly internal, eg, no cigar line, wrong arguments or
miswritten query_unit or hit_unit
Caller : _parse_cigar in derived class
=cut
sub _generic_parse_cigar2 {
my ( $self, $query_unit, $hit_unit ) = @_;
my $string = $self->cigar_string;
if (!defined($string)) {
$self->throw("No cigar string defined in object. This should be caught by the cigar_string method and never happen");
}
my @pieces = split(/:/,$string);
my @features;
foreach my $piece (@pieces) {
my $start1;
my $start2;
my $length;
if ($piece =~ /(\S+)\,(\S+)\,(\S+)/) {
$start1 = $1;
$start2 = $2;
$length = $3;
} else {
$self->throw("Can't parse cigar string element [$piece]. Invalid format. Should be start,end,length*strand");
}
my $strand = 1;
if ($length == 0) {
$self->throw("Length of piece is 0 - impossible");
}
if ($length < 0) {
$strand = -1;
}
$length = abs($length);
my $feature1 = new Bio::EnsEMBL::SeqFeature();
$feature1->start($start1);
$feature1->end ($start1 + $length - 1);
$feature1->strand($strand);
$feature1->score($self->score);
$feature1->seqname($self->seqname);
$feature1->phase($self->phase);
$feature1->p_value($self->p_value);
$feature1->percent_id($self->percent_id);
my $feature2 = new Bio::EnsEMBL::SeqFeature();
my $mapped_length;
# explicit if statements to avoid rounding problems
# and make sure we have sane coordinate systems
if( $query_unit == 1 && $hit_unit == 3 ) {
$mapped_length = $length*3;
} elsif( $query_unit == 3 && $hit_unit == 1 ) {
$mapped_length = $length / 3;
} elsif ( $query_unit == 1 && $hit_unit == 1 ) {
$mapped_length = $length;
} else {
$self->throw("Internal error $query_unit $hit_unit, currently only allowing 1 or 3 ");
}
if( int($mapped_length) != $mapped_length ) {
$self->throw("Internal error with mismapped length of hit, query $query_unit, hit $hit_unit, length $length");
}
if ($strand == 1) {
$feature2->start($start2);
$feature2->end ($start2 + $strand*($mapped_length - 1));
} else {
$feature2->end ($start2);
$feature2->start($start2 + $strand*($mapped_length - 1));
}
$feature2->strand($strand);
$feature2->score($self->score);
$feature2->seqname($self->hseqname);
$feature2->phase($self->phase);
$feature2->p_value($self->p_value);
$feature2->percent_id($self->percent_id);
my $fp = new Bio::EnsEMBL::FeaturePair(-feature1 => $feature1,
-feature2 => $feature2);
$fp->analysis($self->analysis);
push(@features,$fp);
}
return @features;
}
=head2 _generic_parse_features
Arg 1 : listref Bio::EnsEMBL::FeaturePair $features
Arg 2 : int query_unit,
either 1 (peptide) or 3 (dna)
Arg 3 : int hit_unit,
either 1 (peptide) or 3 (dna)
a DNA,DNA alignment should use 1,1 for the query_unit and hit_unit
Usage : Internal method - not used.
Function : Converts an array of FeaturePairs into a gapped feature with
a cigar string describing the
See sub cigar_string for an explanation of what that is.
Exception: If the argument passed is not an array reference
All the features must have arisen from the same source
i.e. a blast HSP or some other alignment. Thus
exceptions are thrown when the scores,percent ids,p_values
seqnames , hseqnames and strands differ amongst the input
features.
All the features must not overlap in order to provide a
sane gapped alignment. An exception is thrown if they do.
If any element of the array is not a Bio::EnsEMBL::FeaturePair
If there are no elements in the array
If the hit length is not exactly 3 times the query length
Caller : Called internally to the module by the constructor
=cut
sub _generic_parse_features2 {
my ($self,$features, $query_unit, $hit_unit ) = @_;
if (ref($features) ne "ARRAY") {
$self->throw("features must be an array reference not a [" . ref($features) . "]");
}
#print ::LOG "Enter new ",ref( $self ), " with ",scalar( @$features ), " features.\n";
my @f = sort {$a->start <=> $b->start} @$features;
if (scalar(@f) == 0) {
$self->throw("No features in the array to parse");
}
my $hstrand = $f[0]->hstrand;
my $strand = $f[0]->strand;
my $name = $f[0]->seqname;
my $hname = $f[0]->hseqname;
my $score = $f[0]->score;
my $percent = $f[0]->percent_id;
my $pvalue = $f[0]->p_value;
my $analysis = $f[0]->analysis;
my $phase = $f[0]->phase;
# implicit starnd 1 for peptide sequences
( defined $strand ) || ( $strand = 1 );
( defined $hstrand ) || ( $hstrand = 1 );
my $ori = $strand * $hstrand;
my $prev1;
my $prev2;
my $string;
my $f1start = $f[0]->start;
my $f1end = $f[$#f]->end;
my $f2start;
my $f2end;
if ( $hstrand == 1 ) {
$f2start = $f[0]->hstart;
$f2end = $f[$#f]->hend;
} else {
$f2end = $f[0]->hend;
$f2start = $f[$#f]->hstart;
}
foreach my $f (@f) {
if (!$f->isa("Bio::EnsEMBL::FeaturePair")) {
$self->throw("Array element [$f] is not a Bio::EnsEMBL::FeaturePair");
}
##print STDERR "Processing " . $f->gffstring . "\n";
if( defined $f->hstrand() ) {
if ($f->hstrand != $hstrand) {
$self->throw("Inconsistent hstrands in feature array");
}
}
if( defined $f->strand() ) {
if ($f->strand != $strand) {
$self->throw("Inconsistent strands in feature array");
}
}
if ($name ne $f->seqname) {
$self->throw("Inconsistent names in feature array [$name - " . $f->seqname . "]");
}
if ($hname ne $f->hseqname) {
$self->throw("Inconsistent names in feature array [$hname - " . $f->hseqname . "]");
}
if ($score ne $f->score) {
$self->throw("Inconsisent scores in feature array [$score - " . $f->score . "]");
}
if ($percent ne $f->percent_id) {
$self->throw("Inconsistent pids in feature array [$percent - " . $f->percent_id . "]");
}
my $start1 = $f->start;
my $start2 = $f->hstart();
if (defined($prev2)) {
if ( $ori == 1 ) {
if ($f->hstart < $prev2) {
$self->throw("Inconsistent coordinates in feature array " . $f->hstart . " < $prev2");
}
} else {
if ($f->hend > $prev2) {
$self->throw("Inconsistent coordinates in feature array " . $f->hend . " > $prev2");
}
}
}
my $length = ($f->end - $f->start + 1);
my $hlength = ($f->hend - $f->hstart + 1);
# using multiplication to avoid rounding errors, hence the
# switch from query to hit for the ratios
if( $length * $hit_unit != $hlength * $query_unit ) {
$self->throw( "Feature lengths not comparable Lengths:".$length." ".$hlength." Ratios:".$query_unit." ". $hit_unit );
}
# now make length have ori sign
$length *= $ori;
$string = $string . $start1 . "," . $start2 . "," . $length . ":";
if( $ori == 1 ) {
$prev2 = $f->hend;
} else {
$prev2 = $f->hstart;
}
}
$string =~ s/:$//;
my $feature1 = new Bio::EnsEMBL::SeqFeature();
$feature1->start($f1start);
$feature1->end ($f1end);
$feature1->strand($strand);
$feature1->score($score);
$feature1->percent_id($percent);
$feature1->p_value($pvalue);
$feature1->seqname($name);
$feature1->percent_id($percent);
$feature1->p_value($pvalue);
$feature1->phase($phase);
$feature1->analysis($analysis);
my $feature2 = new Bio::EnsEMBL::SeqFeature();
$feature2->start($f2start);
$feature2->end ($f2end);
$feature2->strand($hstrand);
$feature2->score($score);
$feature2->seqname($hname);
$feature2->percent_id($percent);
$feature2->p_value($pvalue);
$feature2->phase($phase);
$feature2->analysis($analysis);
$self->feature1($feature1);
$self->feature2($feature2);
$self->cigar_string($string);
#print ::LOG "Exit with cigar $string.\n";
}
=head2 ungapped_features
Arg : None.
......@@ -516,7 +203,7 @@ sub ungapped_features {
}
=head2 _generic_parse_cigar2
=head2 _generic_parse_cigar
Arg 1 : int query_unit,
either 1 (peptide) or 3 (dna)
......
......@@ -118,6 +118,7 @@ sub register_region{
$self->throw("$assmapper is not a Bio::EnsEMBL::AssemblyMapper")
unless $assmapper->isa("Bio::EnsEMBL::AssemblyMapper");
# This is to reassure people that I've got this piece of
# SQL correct ;) EB
#
......@@ -132,8 +133,12 @@ sub register_region{
# SCP: Those not clauses are a bit confusing, so I've
# ditched them :-) "NOT a > b" equiv to "a <= b"
# No. SCP - not right! The NOT has a bracket'd AND and so
# one can't just expand it like this EB. Replaced it and
# if *anyone* wants to change this, talk to me or James G first!
my $sth = $self->prepare(qq{
my $select = qq{
select
ass.contig_start,
ass.contig_end,
......@@ -148,24 +153,28 @@ sub register_region{
where
chr.name = '$chr_name' and
ass.chromosome_id = chr.chromosome_id and
$end >= ass.chr_start and
$start <= ass.chr_end and
NOT (
$start >= ass.chr_end and
$end <= ass.chr_start) and
ass.type = '$type'
});
};
#print STDERR "Going to select $select\n";
my $sth = $self->prepare($select);
$sth->execute();
while( my $arrayref = $sth->fetchrow_arrayref ) {
my ($contig_start,$contig_end,$contig_id,$contig_ori,$chr_name,$chr_start,$chr_end) = @$arrayref;
if( $assmapper->_have_registered_contig($contig_id) == 0 ) {
$assmapper->_register_contig($contig_id);
$assmapper->_mapper->add_map_coordinates(
$contig_id, $contig_start, $contig_end, $contig_ori,
$chr_name, $chr_start, $chr_end
);
}
my ($contig_start,$contig_end,$contig_id,$contig_ori,$chr_name,$chr_start,$chr_end) = @$arrayref;
if( $assmapper->_have_registered_contig($contig_id) == 0 ) {
$assmapper->_register_contig($contig_id);
$assmapper->_mapper->add_map_coordinates(
$contig_id, $contig_start, $contig_end, $contig_ori,
$chr_name, $chr_start, $chr_end
);
}
}
}
......
......@@ -2227,82 +2227,6 @@ sub perl_only_contigs{
}
=head2 delete_Clone
Title : delete_Clone
Usage : $obj->delete_Clone($clone_id)
Function: Deletes clone, including contigs, but not its genes
Example :
Returns :
Args :
=cut
sub delete_Clone{
my ($self,$clone_id) = @_;
#$self->warn("Obj->delete_Clone is a deprecated method!
#Calling Clone->delete instead!");
(ref($clone_id)) && $self->throw ("Passing an object reference instead of a variable\n");
my $clone = new Bio::EnsEMBL::DBSQL::Clone( -id => $clone_id,
-dbobj => $self );
return $clone->delete();
}
=head2 cloneid_to_geneid
Title : cloneid_to_geneid
Usage :
Function:
Example :
Returns :
Args :
=cut
sub cloneid_to_geneid{
my ($self,$cloneid) = @_;
$self->warn("Obj->cloneid_to_geneid is a deprecated method!
Calling Clone->get_all_geneid instead!");
(ref($cloneid)) && $self->throw ("Passing an object reference instead of a variable!\n");
my $clone = new Bio::EnsEMBL::DBSQL::Clone( -id => $cloneid,
-dbobj => $self );
return $clone->get_all_my_geneid();
}
=head2 gene_Obj
Title : gene_Obj
Usage : my $geneobj = $db->gene_Obj
Function: Returns the gene object database handle
Example :
Returns : Bio::EnsEMBL::DB::Gene_ObjI
Args :
=cut
sub gene_Obj {
my( $self ) = @_;
my( $go );
unless ($go = $self->{'_gene_obj'}) {
require Bio::EnsEMBL::DBSQL::Gene_Obj;
$go = Bio::EnsEMBL::DBSQL::Gene_Obj->new($self);
$self->{'_gene_obj'} = $go;
}
return $go;
}
sub get_GeneAdaptor {
my( $self ) = @_;
......@@ -2401,6 +2325,18 @@ sub get_RawContigAdaptor {
}
sub get_SliceAdaptor {
my( $self ) = @_;
my( $rca );
unless ($rca = $self->{'_slice_adaptor'}) {
require Bio::EnsEMBL::DBSQL::SliceAdaptor;
$a = Bio::EnsEMBL::DBSQL::SliceAdaptor->new($self);
$self->{'_slice_adaptor'} = $a;
}
return $a;
}
=head2 get_AnalysisAdaptor
Title : get_AnalysisAdaptor
......
......@@ -91,22 +91,8 @@ sub fetch_by_dbID{
}
my $contig = $self->db->get_RawContigAdaptor->fetch_by_dbID($contig_id);
my $out = Bio::EnsEMBL::FeatureFactory->new_feature_pair();;
$out->start($start);
$out->end($end);
$out->strand($strand);
$out->score($score);
$out->hstart($hstart);
$out->hend($hend);
$out->hstrand($hstrand);
$out->hseqname($hname);
$out->cigar($cigar);
$out->analysis($self->db->get_AnalysisAdaptor->fetch_by_dbID($analysis_id));
$out->seqname($contig->id);
$out->attach_seq($contig->seq);
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_dbID($analysis_id);
my $out= $self->_new_feature($start,$end,$strand,$score,$hstart,$hend,$hstrand,$hname,$cigar,$analysis,$contig->name,$contig->seq);
return $out;
......@@ -148,25 +134,15 @@ sub fetch_by_contig_id{
}
my $out = Bio::EnsEMBL::FeatureFactory->new_feature_pair();;
$out->start($start);
$out->end($end);
$out->strand($strand);
$out->score($score);
$out->hstart($hstart);
$out->hend($hend);
$out->hstrand($hstrand);
$out->hseqname($hname);
$out->cigar($cigar);
my $out= $self->_new_feature($start,$end,$strand,$score,$hstart,$hend,$hstrand,$hname,$cigar,$ana{$analysis_id},$contig->name,$contig->seq);
$out->analysis($ana{$analysis_id});
$out->seqname($contig->name);
$out->attach_seq($contig->seq);
push(@f,$out);
}
return @f;
}
sub fetch_by_contig_id_and_logic_name{
my($self, $cid, $logic_name) = @_;
......@@ -280,6 +256,13 @@ sub fetch_by_contig_id_and_dbname{
}
sub fetch_by_Slice_and_score {
my ($self,$slice,$score) = @_;
$self->warn("*** CODE GOING TO BE FIXED - RETURNING 0***");
return ();
}
=head2 fetch_by_assembly_location
Title : fetch_by_assembly_location
......@@ -299,6 +282,10 @@ sub fetch_by_assembly_location{
$self->throw("Assembly location must be start,end,chr,type");
}
$self->warn("*** CODE GOING TO BE FIXED - RETURNING 0***");
return ();
if( $start !~ /^\d/ || $end !~ /^\d/ ) {
$self->throw("start/end must be numbers not $start,$end (have you typed the location in the right way around - start,end,chromosome,type");
}
......@@ -805,6 +792,38 @@ sub fetch_featurepair_list_by_assembly_location_nad_logic_name{
}
sub _new_feature {
my ($self,$start,$end,$strand,$score,$hstart,$hend,$hstrand,$hseqname,$cigar,$analysis,$seqname,$seq) = @_;
if( !defined $seq ) {
$self->throw("Internal error - wrong number of arguments to new_feature");
}
my $f1 = Bio::EnsEMBL::SeqFeature->new();
my $f2 = Bio::EnsEMBL::SeqFeature->new();
$f1->start($start);
$f1->end($end);
$f1->strand($strand);
$f1->score($score);
$f1->seqname($seqname);
$f1->attach_seq($seq);
$f2->start($hstart);
$f2->end($hend);
$f2->strand($hstrand);
$f2->seqname($hseqname);
$f1->analysis($analysis);
$f2->analysis($analysis);