From 506a9249117ca9f8587d90cde9533e1a2d7a3aba Mon Sep 17 00:00:00 2001 From: Arne Stabenau <stabenau@sanger.ac.uk> Date: Thu, 21 Nov 2002 18:06:02 +0000 Subject: [PATCH] nt_contigs as slices supported --- modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm | 78 ++++++++++++++++++----- modules/Bio/EnsEMBL/Slice.pm | 60 +++++++++++++++-- modules/t/slice.t | 14 +++- modules/t/sliceAdaptor.t | 18 +++++- 4 files changed, 142 insertions(+), 28 deletions(-) diff --git a/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm index bada5f9fbf..5a07d51091 100644 --- a/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm @@ -163,36 +163,80 @@ sub fetch_by_contig_name { sub fetch_by_fpc_name { my ($self,$fpc_name) = @_; + + my( $p, $f, $l ) = caller; + $self->warn( "$f:$l calls deprecated method fetch_by_fpc_name. Please use fetch_by_supercontig_name instead" ); - my $type = $self->db->assembly_type(); + $self->fetch_by_supercontig_name( $fpc_name ); +} - my $sth = $self->db->prepare(" +sub fetch_by_supercontig_name { + my ($self,$supercontig_name) = @_; + + my $assembly_type = $self->db->assembly_type(); + + my $sth = $self->db->prepare(" SELECT chr.name, a.superctg_ori, MIN(a.chr_start), MAX(a.chr_end) FROM assembly a, chromosome chr - WHERE superctg_name = '$fpc_name' - AND type = '$type' + WHERE superctg_name = ? + AND type = ? AND chr.chromosome_id = a.chromosome_id GROUP by superctg_name "); - $sth->execute; + $sth->execute( $supercontig_name, $assembly_type ); + + my ($chr, $strand, $slice_start, $slice_end) = $sth->fetchrow_array; + + my $slice; + + $slice = new Bio::EnsEMBL::Slice + ( + -chr_name => $chr, + -chr_start =>$slice_start, + -chr_end => $slice_end, + -strand => $strand, + -assembly_type => $assembly_type + ); + + return $slice; +} - my ($chr, $strand, $slice_start, $slice_end) = $sth->fetchrow_array; - my $slice; +=head2 list_overlapping_supercontigs - $slice = new Bio::EnsEMBL::Slice - ( - -chr_name => $chr, - -chr_start =>$slice_start, - -chr_end => $slice_end, - -strand => $strand, - -assembly_type => $type - ); + Arg [1] : Bio::EnsEMBL::Slice $slice + overlapping given Sice + Example : + Description: return the names of the supercontigs that overlap given Slice. + Returntype : listref string + Exceptions : none + Caller : general + +=cut - return $slice; -} +sub list_overlapping_supercontigs { + my ($self,$slice) = @_; + my $sth = $self->db->prepare( " + SELECT DISTINCT superctg_name + FROM assembly a, chromosome c + WHERE c.chromosome_id = a.chromosome_id + AND c.name = ? + AND a.type = ? + AND a.chr_end >= ? + AND a.chr_start <= ? + " ); + $sth->execute( $slice->chr_name(), $slice->assembly_type(), + $slice->chr_start(), $slice->chr_end() ); + + my $result = []; + while( my $aref = $sth->fetchrow_arrayref() ) { + push( @$result, $aref->[0] ); + } + + return $result; +} =head2 fetch_by_clone_accession diff --git a/modules/Bio/EnsEMBL/Slice.pm b/modules/Bio/EnsEMBL/Slice.pm index 48690ac754..61204a9c71 100644 --- a/modules/Bio/EnsEMBL/Slice.pm +++ b/modules/Bio/EnsEMBL/Slice.pm @@ -192,7 +192,7 @@ sub dbID { =head2 name - Arg [1] : none + Arg [1] : optional string $name Example : $name = $slice->name(); Description: Returns the name of this slice. The name is formatted as a the following string: "$chr_name.$chr_start-$chr_end". @@ -201,6 +201,7 @@ sub dbID { can also act as a hash value. This is similar to the name method in RawContig so for exons which can have either type of sequence attached it provides a more common interface. + You can as well set the slicename to something like "NT_110023" Returntype : string Exceptions : none Caller : general @@ -208,18 +209,63 @@ sub dbID { =cut sub name { - my $self = shift; + my ( $self, $arg ) = @_; + + if( defined $arg ) { + $self->{name} = $arg; + } elsif(!defined $self->{name}) { - my $string = join '', $self->chr_name, '.', - $self->chr_start, '-', $self->chr_end(); + my $string = join '', $self->chr_name, '.', + $self->chr_start, '-', $self->chr_end(); - if($self->strand == -1) { - return "reverse($string)"; + + if($self->strand == -1) { + $self->{name} = "reverse($string)"; + } else { + $self->{name} = $string; + } } - return $string; + return $self->{name}; } +=head2 get_all_supercontig_Slices + + Arg [1] : none + Example : none + Description: Returns Slices that represent overlapping supercontigs. + Coordinates inside those slices are supercontig coordinates. + You can transfer features to this slices coordinate system with + the normal transform call. The returned slices hav their names + set to the supercontig names. + Returntype : listref Bio::EnsEMBL::Slice + Exceptions : none + Caller : none + +=cut + + +sub get_all_supercontig_Slices { + my $self = shift; + my $result = []; + + if( $self->adaptor() ) { + my $superctg_names = $self->adaptor()->list_overlapping_supercontigs( $self ); + + for my $name ( @$superctg_names ) { + my $slice; + $slice = $self->adaptor()->fetch_by_supercontig_name( $name ); + $slice->name( $name ); + push( @$result, $slice ); + } + } else { + $self->warn( "Slice needs to be attached to a database to get supercontigs" ); + } + + return $result; +} + + diff --git a/modules/t/slice.t b/modules/t/slice.t index 21429fdb97..ddd6626761 100644 --- a/modules/t/slice.t +++ b/modules/t/slice.t @@ -5,13 +5,15 @@ use lib 't'; BEGIN { $| = 1; use Test; - plan tests => 46; + plan tests => 48; } +use TestUtils qw( debug ); use MultiTestDB; use Bio::EnsEMBL::Slice; +our $verbose= 0; # #1 TEST - Slice Compiles @@ -270,6 +272,16 @@ $softmasked_seq = $seq = undef; ok(scalar @{$slice->get_tiling_path}); +my $super_slices = $slice->get_all_supercontig_Slices(); +# +# 47-48 get_all_supercontig_Slices() +# +debug( "Supercontig starts at ".$super_slices->[0]->chr_start() ); + +ok( $super_slices->[0]->chr_start() == 29591966 ); + +debug( "Supercontig name ".$super_slices->[0]->name() ); +ok( $super_slices->[0]->name() eq "NT_028392" ); diff --git a/modules/t/sliceAdaptor.t b/modules/t/sliceAdaptor.t index 0d85ce760e..e1cfb80569 100644 --- a/modules/t/sliceAdaptor.t +++ b/modules/t/sliceAdaptor.t @@ -5,12 +5,14 @@ use warnings; BEGIN { $| = 1; use Test; - plan tests => 26; + plan tests => 27; } use MultiTestDB; use Bio::EnsEMBL::DBSQL::SliceAdaptor; -use TestUtils qw(test_getter_setter); +use TestUtils qw(test_getter_setter debug); + +our $verbose = 1; my ($CHR, $START, $END, $FLANKING) = ("20", 30_252_000, 31_252_001, 1000); @@ -68,10 +70,12 @@ ok($new_slice->chr_end == $slice->chr_end + $FLANKING); # 12-13 fetch_by_fpc_name # my $fpc_name = 'NT_011387'; -$slice = $slice_adaptor->fetch_by_fpc_name($fpc_name); +$slice = $slice_adaptor->fetch_by_supercontig_name($fpc_name); ok($new_slice->chr_start); ok($new_slice->chr_end); + + # # 14 - 15 fetch_by_clone_accession # @@ -145,4 +149,12 @@ my $chromo = $db->get_ChromosomeAdaptor->fetch_by_chr_name($CHR); ok($chromo->length eq $slice->chr_end); +$slice = $slice_adaptor->fetch_by_chr_start_end("20", 29_252_000, 31_252_001 ); +my $name_list = $slice_adaptor->list_overlapping_supercontigs( $slice ); + +for my $name ( @$name_list ) { + debug( "Overlapping supercontig ".$name ); +} + +ok( $name_list->[0] eq "NT_028392" ); -- GitLab