From 4312cdc1025633cf533857a594e9bb344d81cfab Mon Sep 17 00:00:00 2001 From: Ian Longden <ianl@sanger.ac.uk> Date: Fri, 12 Nov 2010 09:54:09 +0000 Subject: [PATCH] Allow multiple assembly exceptions and store synonyms of slice (seq region synonym) if they exist --- modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm | 82 +++++++++++++++++------ 1 file changed, 63 insertions(+), 19 deletions(-) diff --git a/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm index 98bd045b33..3ce885ff4c 100644 --- a/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm @@ -290,6 +290,20 @@ sub fetch_by_region { if ( $sth->rows() == 0 ) { $sth->finish(); + + # try synonyms + my $syn_sql_sth = $self->prepare("select s.name from seq_region s, seq_region_synonym ss where s.seq_region_id = ss.seq_region_id and ss.synonym = ?"); + $syn_sql_sth->bind_param(1, $seq_region_name, SQL_VARCHAR); + $syn_sql_sth->execute(); + my $new_name; + $syn_sql_sth->bind_columns( \$new_name); + if($syn_sql_sth->fetch){ + $syn_sql_sth->finish; + return $self->fetch_by_region($coord_system_name, $new_name, $start, $end, $strand, $version, $no_fuzz); + } + $syn_sql_sth->finish; + + if ($no_fuzz) { return undef } # Do fuzzy matching, assuming that we are just missing a version @@ -1374,40 +1388,62 @@ sub fetch_normalized_slice_projection { } my @syms; - if( @haps > 1 ) { + if( @haps >= 1 ) { my @sort_haps = sort { $a->[1] <=> $b->[1] } @haps; - throw( "More than one HAP region not supported yet" ); - } elsif( @haps == 1 ) { - my $hap = $haps[0]; - my $seq_reg_slice = $self->fetch_by_seq_region_id($slice_seq_region_id); - my $exc_slice = $self->fetch_by_seq_region_id( $hap->[2] ); + my $count =0; + my $chr_start = 1; + my $hap_start = 1; + my $last = 0; - # - # lengths of haplotype and reference in db may be different - # we want to use the maximum possible length for the mapping - # between the two systems - # + my $seq_reg_slice = $self->fetch_by_seq_region_id($slice_seq_region_id); + my $exc_slice = $self->fetch_by_seq_region_id( $sort_haps[0][2] ); my $len1 = $seq_reg_slice->length(); my $len2 = $exc_slice->length(); my $max_len = ($len1 > $len2) ? $len1 : $len2; - #the inserted region can differ in length, but mapped sections - #need to be same lengths - my $diff = $hap->[4] - $hap->[1]; + while($count <= scalar(@sort_haps) and !$last){ + my $chr_end; + my $hap_end; + if(defined($sort_haps[$count]) and defined($sort_haps[$count][0]) ){ + $hap_end = $sort_haps[$count][0]-1; + $chr_end = $sort_haps[$count][3]-1 + } + else{ + $last = 1; + $hap_end = $len1; + $chr_end = $len2; + my $diff = ($hap_end-$hap_start)-($chr_end-$chr_start); + if($diff > 0){ + push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end+$diff] ); + } + elsif($diff < 0){ + push( @syms, [ $hap_start, $hap_end+$diff, $sort_haps[0][2], $chr_start, $chr_end] ); + } + else{ + push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end] ); + } + next; + } + if($hap_end and $hap_start < $len1){ # if hap at start or end of chromosome + push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end] ); + } + $chr_start = $chr_end + ($sort_haps[$count][4]-$sort_haps[$count][3]) + 2; + $hap_start = $hap_end + ($sort_haps[$count][1]-$sort_haps[$count][0]) + 2; + $count++; + } + - # we want the region of the haplotype INVERTED - push( @syms, [ 1, $hap->[0]-1, $hap->[2], 1, $hap->[3] - 1 ] ); - push( @syms, [ $hap->[1]+1, $max_len - $diff, - $hap->[2], $hap->[4] + 1, $max_len ] ); } + # for now haps and pars should not be both there, but in theory we # could handle it here by cleverly merging the pars into the existing syms, # for now just: push( @syms, @pars ); my $mapper = Bio::EnsEMBL::Mapper->new( "sym", "org" ); + my $count = 0; for my $sym ( @syms ) { $mapper->add_map_coordinates( $slice_seq_region_id, $sym->[0], $sym->[1], 1, $sym->[2], $sym->[3], $sym->[4] ); @@ -1581,6 +1617,15 @@ sub store { $seq_adaptor->store($seq_region_id, $$seqref); } + #synonyms + if(defined($slice->{'synonym'})){ + foreach my $syn (@{$slice->{'synonym'}} ){ + $syn->seq_region_id($seq_region_id); # set the seq_region_id + $syn->adaptor->store($syn); + } + } + + $slice->adaptor($self); return $seq_region_id; @@ -1819,7 +1864,6 @@ LSQL } ## end sub cache_toplevel_seq_mappings - ##################################### # sub DEPRECATED METHODs ##################################### -- GitLab