SliceAdaptor.pm 16.7 KB
Newer Older
1 2

#
Simon Potter's avatar
pod  
Simon Potter committed
3
# Ensembl module for Bio::EnsEMBL::DBSQL::SliceAdaptor
4 5 6 7 8 9 10 11 12 13 14
#
# Cared for by Ewan Birney <ensembl-dev@ebi.ac.uk>
#
# Copyright Ewan Birney
#
# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

15
Bio::EnsEMBL::DBSQL::SliceAdaptor - Adaptors for slices
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45

=head1 SYNOPSIS
  



=head1 DESCRIPTION

Factory for getting out slices of assemblies. WebSlice is the highly
accelerated version for the web site.

=head1 AUTHOR - Ewan Birney

This modules is part of the Ensembl project http://www.ensembl.org

Email ensembl-dev@ebi.ac.uk

Describe contact details here

=head1 APPENDIX

The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _

=cut


# Let the code begin...


46
package Bio::EnsEMBL::DBSQL::SliceAdaptor;
47 48 49
use vars qw(@ISA);
use strict;

50 51

# Object preamble - inherits from Bio::EnsEMBL::Root
52 53
use Bio::EnsEMBL::DBSQL::BaseAdaptor;
use Bio::EnsEMBL::Slice;
54
use Bio::EnsEMBL::DBSQL::DBAdaptor;
55

56

57
@ISA = ('Bio::EnsEMBL::DBSQL::BaseAdaptor');
58 59


Graham McVicker's avatar
Graham McVicker committed
60
# new is inherited from BaseAdaptor
61

62

63

64
=head2 fetch_by_chr_start_end
65

66 67 68 69 70 71 72 73 74 75 76 77 78
  Arg [1]    : string $chr
               the name of the chromosome to obtain a slice for
  Arg [2]    : int $start
               the start basepair of the slice to obtain in chromosomal 
               coordinates
  Arg [3]    : int $end 
               the end basepair of the slice to obtain in chromosomal 
               coordinates
  Example    : $slice = $slice_adaptor->fetch_by_chr_start_end();
  Description: Creates a slice object on the given chromosome and coordinates.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
79 80 81

=cut

Graham McVicker's avatar
Graham McVicker committed
82
sub fetch_by_chr_start_end {
83 84
    my ($self,$chr,$start,$end) = @_;

85 86
    unless($chr) {
      $self->throw("chromosome name argument must be defined and not ''");
87 88
    }

89 90 91 92 93 94 95 96 97
    unless(defined $end) {   # Why defined?  Is '0' a valid end?
      $self->throw("end argument must be defined\n");
    }

    unless(defined $start) {
      $self->throw("start argument must be defined\n");
    }

    if($start > $end) {
98
      $self->throw("start must be less than end: parameters $chr:$start:$end");
99
    }
100
    
101
    my $slice;
102
    my $type = $self->db->assembly_type();
103

Graham McVicker's avatar
Graham McVicker committed
104
    $slice = Bio::EnsEMBL::Slice->new(
105 106 107 108
          -chr_name      => $chr,
          -chr_start     => $start,
          -chr_end       => $end,
          -assembly_type => $type,
109
          -adaptor       => $self
Graham McVicker's avatar
Graham McVicker committed
110
	 );
111 112 113 114 115 116

    return $slice;
}



117
=head2 fetch_by_contig_name
118

Graham McVicker's avatar
Graham McVicker committed
119 120 121 122 123 124 125 126 127 128 129
  Arg [1]    : string $name
               the name of the contig to obtain a slice for
  Arg [2]    : (optional) int $size
               the size of the flanking regions to obtain (aka context size)
  Example    : $slc = $slc_adaptor->fetch_by_contig_name('AB000878.1.1.33983');
  Description: Creates a slice object around the specified contig.  
               If a context size is given, the slice is extended by that 
               number of basepairs on either side of the contig.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
130 131 132

=cut

133
sub fetch_by_contig_name {
Graham McVicker's avatar
Graham McVicker committed
134
   my ($self,$name, $size) = @_;
135 136 137

   if( !defined $size ) {$size=0;}

Graham McVicker's avatar
Graham McVicker committed
138
   my ($chr_name,$start,$end) = $self->_get_chr_start_end_of_contig($name);
139

140 141 142 143 144 145 146 147
   $start -= $size;
   $end += $size;

   if($start < 1) {
     $start  = 1;
   }

   return $self->fetch_by_chr_start_end($chr_name, $start, $end);
Graham McVicker's avatar
Graham McVicker committed
148 149 150 151
 }



Graham McVicker's avatar
Graham McVicker committed
152
=head2 fetch_by_fpc_name
Graham McVicker's avatar
Graham McVicker committed
153

Graham McVicker's avatar
Graham McVicker committed
154 155 156 157 158 159 160
  Arg [1]    : string $fpc_name
  Example    : my $slice = $slice_adaptor->fetch_by_fpc_name('NT_004321');
  Description: Creates a Slice on the region of the assembly where 
               the specified FPC (super) contig lies.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
Graham McVicker's avatar
Graham McVicker committed
161 162 163 164 165

=cut

sub fetch_by_fpc_name {
    my ($self,$fpc_name) = @_;
Arne Stabenau's avatar
Arne Stabenau committed
166 167 168
    
    my( $p, $f, $l ) = caller; 
    $self->warn( "$f:$l calls deprecated method fetch_by_fpc_name. Please use fetch_by_supercontig_name instead" );
Graham McVicker's avatar
Graham McVicker committed
169

Arne Stabenau's avatar
Arne Stabenau committed
170 171
    $self->fetch_by_supercontig_name( $fpc_name ); 
}
Graham McVicker's avatar
Graham McVicker committed
172

Arne Stabenau's avatar
Arne Stabenau committed
173 174 175 176 177 178
sub fetch_by_supercontig_name {
  my ($self,$supercontig_name) = @_;
  
  my $assembly_type = $self->db->assembly_type();
  
  my $sth = $self->db->prepare("
179 180
        SELECT chr.name, a.superctg_ori, MIN(a.chr_start), MAX(a.chr_end)
        FROM assembly a, chromosome chr
Arne Stabenau's avatar
Arne Stabenau committed
181 182
        WHERE superctg_name = ?
        AND type = ?
183
        AND chr.chromosome_id = a.chromosome_id
Graham McVicker's avatar
Graham McVicker committed
184 185 186
        GROUP by superctg_name
        ");

Arne Stabenau's avatar
Arne Stabenau committed
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
  $sth->execute( $supercontig_name, $assembly_type );
  
  my ($chr, $strand, $slice_start, $slice_end) = $sth->fetchrow_array;
  
  my $slice;
  
  $slice = new Bio::EnsEMBL::Slice
    (
     -chr_name => $chr,
     -chr_start =>$slice_start,
     -chr_end => $slice_end,
     -strand => $strand,
     -assembly_type => $assembly_type
    );
  
  return $slice;
}

Graham McVicker's avatar
Graham McVicker committed
205

Arne Stabenau's avatar
Arne Stabenau committed
206
=head2 list_overlapping_supercontigs
Graham McVicker's avatar
Graham McVicker committed
207

Arne Stabenau's avatar
Arne Stabenau committed
208 209 210 211 212 213 214 215 216
  Arg [1]    : Bio::EnsEMBL::Slice $slice
               overlapping given Sice
  Example    : 
  Description: return the names of the supercontigs that overlap given Slice.  
  Returntype : listref string
  Exceptions : none
  Caller     : general

=cut
Graham McVicker's avatar
Graham McVicker committed
217 218


Arne Stabenau's avatar
Arne Stabenau committed
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
sub list_overlapping_supercontigs {
   my ($self,$slice) = @_;
   my $sth = $self->db->prepare( "
      SELECT DISTINCT superctg_name
        FROM assembly a, chromosome c
       WHERE c.chromosome_id = a.chromosome_id 
         AND c.name = ?
         AND a.type = ?
         AND a.chr_end >= ?
         AND a.chr_start <= ?
       " );
   $sth->execute( $slice->chr_name(), $slice->assembly_type(),
		  $slice->chr_start(), $slice->chr_end() );

   my $result = [];
   while( my $aref = $sth->fetchrow_arrayref() ) {
     push( @$result, $aref->[0] );
   }
237

Arne Stabenau's avatar
Arne Stabenau committed
238 239
   return $result;
}
240 241


Graham McVicker's avatar
Graham McVicker committed
242 243
=head2 fetch_by_clone_accession

Graham McVicker's avatar
Graham McVicker committed
244 245 246 247 248 249 250 251 252 253 254
  Arg [1]    : string $clone 
               the embl accession of the clone object to retrieve
  Arg [2]    : (optional) int $size
               the size of the flanking regions to obtain around the clone 
  Example    : $slc = $slc_adaptor->fetch_by_clone_accession('AC000012',1000);
  Description: Creates a Slice around the specified clone.  If a context size 
               is given, the Slice is extended by that number of basepairs on 
               either side of the clone.  Throws if the clone is not golden.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : thrown if the clone is not in the assembly 
  Caller     : general
255 256 257

=cut

Graham McVicker's avatar
Graham McVicker committed
258
sub fetch_by_clone_accession{
259 260 261
   my ($self,$clone,$size) = @_;

   if( !defined $clone ) {
Graham McVicker's avatar
Graham McVicker committed
262
     $self->throw("Must have clone to fetch Slice of clone");
263 264 265
   }
   if( !defined $size ) {$size=0;}

266
   my $type = $self->db->assembly_type()
267 268 269 270 271
    or $self->throw("No assembly type defined");

   my $sth = $self->db->prepare("SELECT  c.name,
                        a.chr_start,
                        a.chr_end,
272
                        chr.name 
273 274
                    FROM    assembly a, 
                        contig c, 
275 276
                        clone  cl,
                        chromosome chr
277 278 279
                    WHERE c.clone_id = cl.clone_id
                    AND cl.name = '$clone'  
                    AND c.contig_id = a.contig_id 
280 281
                    AND a.type = '$type'
                    AND chr.chromosome_id = a.chromosome_id
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
                    ORDER BY a.chr_start"
                    );
   $sth->execute();
 
   my ($contig,$start,$end,$chr_name); 
   my $counter; 
   my $first_start;
   while ( my @row=$sth->fetchrow_array){
       $counter++;
       ($contig,$start,$end,$chr_name)=@row;
       if ($counter==1){$first_start=$start;}      
   }

   if( !defined $contig ) {
       $self->throw("Clone is not on the golden path. Cannot build Slice");
   }
     
299 300 301 302 303 304 305 306
   $first_start -= $size;
   $end += $size;

   if($first_start < 1) {
     $first_start = 1;
   }

   my $slice = $self->fetch_by_chr_start_end($chr_name, $first_start, $end);
307 308 309 310 311
   return $slice;
}



Graham McVicker's avatar
Graham McVicker committed
312
=head2 fetch_by_transcript_stable_id
313

Graham McVicker's avatar
Graham McVicker committed
314 315 316 317 318 319 320 321 322 323 324 325 326 327
  Arg [1]    : string $transcriptid
               The stable id of the transcript around which the slice is 
               desired
  Arg [2]    : (optional) int $size
               The length of the flanking regions the slice should encompass 
               on either side of the transcript (0 by default)
  Example    : $slc = $sa->fetch_by_transcript_stable_id('ENST00000302930',10);
  Description: Creates a slice around the region of the specified transcript. 
               If a context size is given, the slice is extended by that 
               number of basepairs on either side of the 
               transcript.  Throws if the transcript is not golden.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
328 329 330

=cut

Graham McVicker's avatar
Graham McVicker committed
331
sub fetch_by_transcript_stable_id{
332 333 334 335 336 337
  my ($self,$transcriptid,$size) = @_;

  # Just get the dbID, then fetch slice by that
  my $ta = $self->db->get_TranscriptAdaptor;
  my $transcript_obj = $ta->fetch_by_stable_id($transcriptid);
  my $dbID = $transcript_obj->dbID;
Graham McVicker's avatar
Graham McVicker committed
338 339
  
  return $self->fetch_by_transcript_id($dbID, $size);
340 341
}

342

Graham McVicker's avatar
Graham McVicker committed
343 344


Graham McVicker's avatar
Graham McVicker committed
345 346
=head2 fetch_by_transcript_id

Graham McVicker's avatar
Graham McVicker committed
347 348 349 350 351 352 353 354 355 356 357 358 359 360
  Arg [1]    : int $transcriptid
               The unique database identifier of the transcript around which 
               the slice is desired
  Arg [2]    : (optional) int $size
               The length of the flanking regions the slice should encompass 
               on either side of the transcript (0 by default)
  Example    : $slc = $sa->fetch_by_transcript_id(24, 1000);
  Description: Creates a slice around the region of the specified transcript. 
               If a context size is given, the slice is extended by that 
               number of basepairs on either side of the 
               transcript. 
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : thrown on incorrect args
  Caller     : general
361 362 363

=cut

Graham McVicker's avatar
Graham McVicker committed
364
sub fetch_by_transcript_id {
365
  my ($self,$transcriptid,$size) = @_;
Graham McVicker's avatar
Graham McVicker committed
366 367

  unless( defined $transcriptid ) {
Graham McVicker's avatar
Graham McVicker committed
368 369
    $self->throw("Must have transcriptid id to fetch Slice of transcript");
  }
Graham McVicker's avatar
Graham McVicker committed
370 371 372

  $size = 0 unless(defined $size);
   
Graham McVicker's avatar
Graham McVicker committed
373 374 375 376 377
  my $ta = $self->db->get_TranscriptAdaptor;
  my $transcript_obj = $ta->fetch_by_dbID($transcriptid);
  
  my %exon_transforms;
  
378
  my $emptyslice;
Graham McVicker's avatar
Graham McVicker committed
379 380 381 382 383 384 385 386 387 388 389
  for my $exon ( @{$transcript_obj->get_all_Exons()} ) {
    $emptyslice = Bio::EnsEMBL::Slice->new( '-empty'   => 1,
					    '-adaptor' => $self,
					    '-ASSEMBLY_TYPE' =>
					    $self->db->assembly_type);     
    my $newExon = $exon->transform( $emptyslice );
    $exon_transforms{ $exon } = $newExon;
  }
  
  $transcript_obj->transform( \%exon_transforms );
  
390 391
  my $start = $transcript_obj->start() - $size;
  my $end = $transcript_obj->end() + $size;
Graham McVicker's avatar
Graham McVicker committed
392
  
393 394 395
  if($start < 1) {
    $start = 1;
  }
396
  
397 398 399
  my $slice = $self->fetch_by_chr_start_end($emptyslice->chr_name,
					    $start, $end);
  return $slice;
400 401
}

402 403


Graham McVicker's avatar
Graham McVicker committed
404
=head2 fetch_by_transcript_stable_id
405

Graham McVicker's avatar
Graham McVicker committed
406 407 408 409 410 411 412 413 414 415 416 417 418
  Arg [1]    : string $geneid
               The stable id of the gene around which the slice is 
               desired
  Arg [2]    : (optional) int $size
               The length of the flanking regions the slice should encompass 
               on either side of the gene (0 by default)
  Example    : $slc = $sa->fetch_by_transcript_stable_id('ENSG00000012123',10);
  Description: Creates a slice around the region of the specified gene. 
               If a context size is given, the slice is extended by that 
               number of basepairs on either side of the gene. 
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
419 420 421

=cut

Graham McVicker's avatar
Graham McVicker committed
422
sub fetch_by_gene_stable_id{
423 424 425 426 427 428 429
   my ($self,$geneid,$size) = @_;

   if( !defined $geneid ) {
       $self->throw("Must have gene id to fetch Slice of gene");
   }
   if( !defined $size ) {$size=0;}

Graham McVicker's avatar
Graham McVicker committed
430
   my ($chr_name,$start,$end) = $self->_get_chr_start_end_of_gene($geneid);
431 432

   if( !defined $start ) {
433
     my $type = $self->db->assembly_type()
Graham McVicker's avatar
Graham McVicker committed
434
       or $self->throw("No assembly type defined");
Graham McVicker's avatar
Graham McVicker committed
435 436
     $self->throw("Gene is not on the golden path '$type'. " .
		  "Cannot build Slice.");
437 438
   }
     
439 440 441 442 443 444 445 446
   $start -= $size;
   $end += $size;
   
   if($start < 1) {
     $start = 1;
   }

   return $self->fetch_by_chr_start_end($chr_name, $start, $end);
447 448 449
}


450

Graham McVicker's avatar
Graham McVicker committed
451
=head2 fetch_by_chr_name
Graham McVicker's avatar
Graham McVicker committed
452

Graham McVicker's avatar
Graham McVicker committed
453 454 455 456 457 458
  Arg [1]    : string $chr_name
  Example    : $slice = $slice_adaptor->fetch_by_chr_name('20'); 
  Description: Retrieves a slice on the region of an entire chromosome
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : thrown if $chr_name arg is not supplied
  Caller     : general
Graham McVicker's avatar
Graham McVicker committed
459 460 461 462 463 464 465 466 467 468 469 470 471 472

=cut

sub fetch_by_chr_name{
   my ($self,$chr_name) = @_;

   unless( $chr_name ) {
       $self->throw("Chromosome name argument required");
   }

   my $chr_start = 1;
   
   #set the end of the slice to the end of the chromosome
   my $ca = $self->db()->get_ChromosomeAdaptor();
473
   my $chromosome = $ca->fetch_by_chr_name($chr_name);
Graham McVicker's avatar
Graham McVicker committed
474 475
   my $chr_end = $chromosome->length();

476 477 478 479 480 481 482 483 484 485 486 487
   my $type = $self->db->assembly_type();

   my $slice = Bio::EnsEMBL::Slice->new
     (
      -chr_name      => $chr_name,
      -chr_start     => 1,
      -chr_end       => $chr_end,
      -assembly_type => $type,
      -adaptor       => $self
     );

   return $slice;
Graham McVicker's avatar
Graham McVicker committed
488 489
}

Graham McVicker's avatar
Graham McVicker committed
490 491


492 493 494 495 496 497 498 499 500 501 502 503
=head2 fetch_by_mapfrag

 Title   : fetch_by_mapfrag
 Usage   : $slice = $slice_adaptor->fetch_by_mapfrag('20');
 Function: Creates a slice of a "mapfrag"
 Returns : Slice object
 Args    : chromosome name


=cut

sub fetch_by_mapfrag{
504
   my ($self,$mymapfrag,$flag,$size) = @_;
505 506 507

   $flag ||= 'fixed-width'; # alt.. 'context'
   $size ||= $flag eq 'fixed-width' ? 200000 : 0;
508
   unless( $mymapfrag ) {
509 510 511 512 513 514 515
       $self->throw("Mapfrag name argument required");
   }

   my( $chr_start,$chr_end);
  
   #set the end of the slice to the end of the chromosome
   my $ca = $self->db()->get_MapFragAdaptor();
516
   my $mapfrag = $ca->fetch_by_synonym($mymapfrag);
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
   return undef unless defined $mapfrag;

   if( $flag eq 'fixed-width' ) {
       my $halfsize = int( $size/2 );
       $chr_start = $mapfrag->seq_start - $halfsize;
       $chr_end   = $mapfrag->seq_start + $size - $halfsize;
   } else {
       $chr_start     = $mapfrag->seq_start - $size;
       $chr_end       = $mapfrag->seq_end   + $size;
   }
   my $type = $self->db->assembly_type();

   my $slice = Bio::EnsEMBL::Slice->new
     (
      -chr_name      => $mapfrag->seq,
      -chr_start     => $chr_start,
      -chr_end       => $chr_end,
      -assembly_type => $type,
      -adaptor       => $self
     );

   return $slice;
}



Graham McVicker's avatar
Graham McVicker committed
543 544 545 546 547 548 549 550 551 552 553 554 555 556 557


=head2 _get_chr_start_end_of_contig

 Title   : _get_chr_start_end_of_contig
 Usage   :
 Function: returns the chromosome name, absolute start and absolute end of the 
           specified contig
 Returns : returns chr,start,end
 Args    : contig id

=cut

sub _get_chr_start_end_of_contig {
    my ($self,$contigid) = @_;
558

Graham McVicker's avatar
Graham McVicker committed
559 560 561 562 563 564 565 566 567 568
   if( !defined $contigid ) {
       $self->throw("Must have contig id to fetch Slice of contig");
   }
   
   my $type = $self->db->assembly_type()
    or $self->throw("No assembly type defined");

   my $sth = $self->db->prepare("SELECT  c.name,
                        a.chr_start,
                        a.chr_end,
569 570
                        chr.name 
                    FROM assembly a, contig c, chromosome chr 
Graham McVicker's avatar
Graham McVicker committed
571 572
                    WHERE c.name = '$contigid' 
                    AND c.contig_id = a.contig_id 
573 574
                    AND a.type = '$type'
                    AND chr.chromosome_id = a.chromosome_id"
Graham McVicker's avatar
Graham McVicker committed
575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
                    );
   $sth->execute();
   my ($contig,$start,$end,$chr_name) = $sth->fetchrow_array;

   if( !defined $contig ) {
     $self->throw("Contig $contigid is not on the golden path of type $type");
   }

   return ($chr_name,$start,$end);
}

=head2 _get_chr_start_end_of_gene

 Title   : get_Gene_chr_bp
 Usage   : 
 Function: 
 Returns :  
 Args    :


=cut


sub _get_chr_start_end_of_gene {
  my ($self,$geneid) =  @_;
600
  
Graham McVicker's avatar
Graham McVicker committed
601 602 603 604 605 606 607 608
  my $type = $self->db->assembly_type()
    or $self->throw("No assembly type defined");
  
  my $sth = $self->db->prepare("SELECT  
   if(a.contig_ori=1,(e.contig_start-a.contig_start+a.chr_start),
                    (a.chr_start+a.contig_end-e.contig_end)),
   if(a.contig_ori=1,(e.contig_end-a.contig_start+a.chr_start),
                    (a.chr_start+a.contig_end-e.contig_start)),
609
     chr.name
Graham McVicker's avatar
Graham McVicker committed
610 611 612 613 614
  
                    FROM    exon e,
                        transcript tr,
                        exon_transcript et,
                        assembly a,
615
                        gene_stable_id gsi,
616
                        chromosome chr
Graham McVicker's avatar
Graham McVicker committed
617 618 619 620 621
                    WHERE e.exon_id=et.exon_id 
                    AND et.transcript_id =tr.transcript_id 
                    AND a.contig_id=e.contig_id 
                    AND a.type = '$type' 
                    AND tr.gene_id = gsi.gene_id
622
                    AND gsi.stable_id = '$geneid'
623
                    AND a.chromosome_id = chr.chromosome_id" 
Graham McVicker's avatar
Graham McVicker committed
624 625
                    );
   $sth->execute();
626

Graham McVicker's avatar
Graham McVicker committed
627 628 629 630 631 632 633 634 635
   my ($start,$end,$chr);
   my @start;
   while ( my @row=$sth->fetchrow_array){
      ($start,$end,$chr)=@row;
       push @start,$start;
       push @start,$end;
   }   
   
   my @start_sorted=sort { $a <=> $b } @start;
636

Graham McVicker's avatar
Graham McVicker committed
637 638
   $start=shift @start_sorted;
   $end=pop @start_sorted;
639

Graham McVicker's avatar
Graham McVicker committed
640
   return ($chr,$start,$end);      
641
}
Graham McVicker's avatar
Graham McVicker committed
642

643
1;
Graham McVicker's avatar
Graham McVicker committed
644 645 646 647 648 649