SliceAdaptor.pm 15.6 KB
Newer Older
1 2

#
Simon Potter's avatar
pod  
Simon Potter committed
3
# Ensembl module for Bio::EnsEMBL::DBSQL::SliceAdaptor
4 5 6 7 8 9 10 11 12 13 14
#
# Cared for by Ewan Birney <ensembl-dev@ebi.ac.uk>
#
# Copyright Ewan Birney
#
# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

15
Bio::EnsEMBL::DBSQL::SliceAdaptor - Adaptors for slices
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45

=head1 SYNOPSIS
  



=head1 DESCRIPTION

Factory for getting out slices of assemblies. WebSlice is the highly
accelerated version for the web site.

=head1 AUTHOR - Ewan Birney

This modules is part of the Ensembl project http://www.ensembl.org

Email ensembl-dev@ebi.ac.uk

Describe contact details here

=head1 APPENDIX

The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _

=cut


# Let the code begin...


46
package Bio::EnsEMBL::DBSQL::SliceAdaptor;
47 48 49
use vars qw(@ISA);
use strict;

50 51

# Object preamble - inherits from Bio::EnsEMBL::Root
52 53
use Bio::EnsEMBL::DBSQL::BaseAdaptor;
use Bio::EnsEMBL::Slice;
54
use Bio::EnsEMBL::DBSQL::DBAdaptor;
55

56

57
@ISA = ('Bio::EnsEMBL::DBSQL::BaseAdaptor');
58 59


Graham McVicker's avatar
Graham McVicker committed
60
# new is inherited from BaseAdaptor
61

62

63

64
=head2 fetch_by_chr_start_end
65

66 67 68 69 70 71 72 73 74 75 76 77 78
  Arg [1]    : string $chr
               the name of the chromosome to obtain a slice for
  Arg [2]    : int $start
               the start basepair of the slice to obtain in chromosomal 
               coordinates
  Arg [3]    : int $end 
               the end basepair of the slice to obtain in chromosomal 
               coordinates
  Example    : $slice = $slice_adaptor->fetch_by_chr_start_end();
  Description: Creates a slice object on the given chromosome and coordinates.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
79 80 81

=cut

Graham McVicker's avatar
Graham McVicker committed
82
sub fetch_by_chr_start_end {
83 84
    my ($self,$chr,$start,$end) = @_;

85 86
    unless($chr) {
      $self->throw("chromosome name argument must be defined and not ''");
87 88
    }

89 90 91 92 93 94 95 96 97
    unless(defined $end) {   # Why defined?  Is '0' a valid end?
      $self->throw("end argument must be defined\n");
    }

    unless(defined $start) {
      $self->throw("start argument must be defined\n");
    }

    if($start > $end) {
98
      $self->throw("start must be less than end: parameters $chr:$start:$end");
99
    }
100
    
101
    my $slice;
102
    my $type = $self->db->assembly_type();
103

Graham McVicker's avatar
Graham McVicker committed
104
    $slice = Bio::EnsEMBL::Slice->new(
105 106 107 108
          -chr_name      => $chr,
          -chr_start     => $start,
          -chr_end       => $end,
          -assembly_type => $type,
109
          -adaptor       => $self
Graham McVicker's avatar
Graham McVicker committed
110
	 );
111 112 113 114 115 116

    return $slice;
}



117
=head2 fetch_by_contig_name
118

Graham McVicker's avatar
Graham McVicker committed
119 120 121 122 123 124 125 126 127 128 129
  Arg [1]    : string $name
               the name of the contig to obtain a slice for
  Arg [2]    : (optional) int $size
               the size of the flanking regions to obtain (aka context size)
  Example    : $slc = $slc_adaptor->fetch_by_contig_name('AB000878.1.1.33983');
  Description: Creates a slice object around the specified contig.  
               If a context size is given, the slice is extended by that 
               number of basepairs on either side of the contig.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
130 131 132

=cut

133
sub fetch_by_contig_name {
Graham McVicker's avatar
Graham McVicker committed
134
   my ($self,$name, $size) = @_;
135 136 137

   if( !defined $size ) {$size=0;}

Graham McVicker's avatar
Graham McVicker committed
138
   my ($chr_name,$start,$end) = $self->_get_chr_start_end_of_contig($name);
139

140 141 142 143 144 145 146 147
   $start -= $size;
   $end += $size;

   if($start < 1) {
     $start  = 1;
   }

   return $self->fetch_by_chr_start_end($chr_name, $start, $end);
Graham McVicker's avatar
Graham McVicker committed
148 149 150 151
 }



Graham McVicker's avatar
Graham McVicker committed
152
=head2 fetch_by_fpc_name
Graham McVicker's avatar
Graham McVicker committed
153

Graham McVicker's avatar
Graham McVicker committed
154 155 156 157 158 159 160
  Arg [1]    : string $fpc_name
  Example    : my $slice = $slice_adaptor->fetch_by_fpc_name('NT_004321');
  Description: Creates a Slice on the region of the assembly where 
               the specified FPC (super) contig lies.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
Graham McVicker's avatar
Graham McVicker committed
161 162 163 164 165 166 167 168 169

=cut

sub fetch_by_fpc_name {
    my ($self,$fpc_name) = @_;

    my $type = $self->db->assembly_type();

    my $sth = $self->db->prepare("
170 171
        SELECT chr.name, a.superctg_ori, MIN(a.chr_start), MAX(a.chr_end)
        FROM assembly a, chromosome chr
Graham McVicker's avatar
Graham McVicker committed
172 173
        WHERE superctg_name = '$fpc_name'
        AND type = '$type'
174
        AND chr.chromosome_id = a.chromosome_id
Graham McVicker's avatar
Graham McVicker committed
175 176 177 178 179 180 181 182 183
        GROUP by superctg_name
        ");

    $sth->execute;

    my ($chr, $strand, $slice_start, $slice_end) = $sth->fetchrow_array;

    my $slice;

184 185 186 187 188 189 190 191
    $slice = new Bio::EnsEMBL::Slice
      (
       -chr_name => $chr,
       -chr_start =>$slice_start,
       -chr_end => $slice_end,
       -strand => $strand,
       -assembly_type => $type
      );
Graham McVicker's avatar
Graham McVicker committed
192 193

    return $slice;
194 195 196 197
}



Graham McVicker's avatar
Graham McVicker committed
198 199
=head2 fetch_by_clone_accession

Graham McVicker's avatar
Graham McVicker committed
200 201 202 203 204 205 206 207 208 209 210
  Arg [1]    : string $clone 
               the embl accession of the clone object to retrieve
  Arg [2]    : (optional) int $size
               the size of the flanking regions to obtain around the clone 
  Example    : $slc = $slc_adaptor->fetch_by_clone_accession('AC000012',1000);
  Description: Creates a Slice around the specified clone.  If a context size 
               is given, the Slice is extended by that number of basepairs on 
               either side of the clone.  Throws if the clone is not golden.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : thrown if the clone is not in the assembly 
  Caller     : general
211 212 213

=cut

Graham McVicker's avatar
Graham McVicker committed
214
sub fetch_by_clone_accession{
215 216 217
   my ($self,$clone,$size) = @_;

   if( !defined $clone ) {
Graham McVicker's avatar
Graham McVicker committed
218
     $self->throw("Must have clone to fetch Slice of clone");
219 220 221
   }
   if( !defined $size ) {$size=0;}

222
   my $type = $self->db->assembly_type()
223 224 225 226 227
    or $self->throw("No assembly type defined");

   my $sth = $self->db->prepare("SELECT  c.name,
                        a.chr_start,
                        a.chr_end,
228
                        chr.name 
229 230
                    FROM    assembly a, 
                        contig c, 
231 232
                        clone  cl,
                        chromosome chr
233 234 235
                    WHERE c.clone_id = cl.clone_id
                    AND cl.name = '$clone'  
                    AND c.contig_id = a.contig_id 
236 237
                    AND a.type = '$type'
                    AND chr.chromosome_id = a.chromosome_id
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
                    ORDER BY a.chr_start"
                    );
   $sth->execute();
 
   my ($contig,$start,$end,$chr_name); 
   my $counter; 
   my $first_start;
   while ( my @row=$sth->fetchrow_array){
       $counter++;
       ($contig,$start,$end,$chr_name)=@row;
       if ($counter==1){$first_start=$start;}      
   }

   if( !defined $contig ) {
       $self->throw("Clone is not on the golden path. Cannot build Slice");
   }
     
255 256 257 258 259 260 261 262
   $first_start -= $size;
   $end += $size;

   if($first_start < 1) {
     $first_start = 1;
   }

   my $slice = $self->fetch_by_chr_start_end($chr_name, $first_start, $end);
263 264 265 266 267
   return $slice;
}



Graham McVicker's avatar
Graham McVicker committed
268
=head2 fetch_by_transcript_stable_id
269

Graham McVicker's avatar
Graham McVicker committed
270 271 272 273 274 275 276 277 278 279 280 281 282 283
  Arg [1]    : string $transcriptid
               The stable id of the transcript around which the slice is 
               desired
  Arg [2]    : (optional) int $size
               The length of the flanking regions the slice should encompass 
               on either side of the transcript (0 by default)
  Example    : $slc = $sa->fetch_by_transcript_stable_id('ENST00000302930',10);
  Description: Creates a slice around the region of the specified transcript. 
               If a context size is given, the slice is extended by that 
               number of basepairs on either side of the 
               transcript.  Throws if the transcript is not golden.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
284 285 286

=cut

Graham McVicker's avatar
Graham McVicker committed
287
sub fetch_by_transcript_stable_id{
288 289 290 291 292 293
  my ($self,$transcriptid,$size) = @_;

  # Just get the dbID, then fetch slice by that
  my $ta = $self->db->get_TranscriptAdaptor;
  my $transcript_obj = $ta->fetch_by_stable_id($transcriptid);
  my $dbID = $transcript_obj->dbID;
Graham McVicker's avatar
Graham McVicker committed
294 295
  
  return $self->fetch_by_transcript_id($dbID, $size);
296 297
}

298

Graham McVicker's avatar
Graham McVicker committed
299 300


Graham McVicker's avatar
Graham McVicker committed
301 302
=head2 fetch_by_transcript_id

Graham McVicker's avatar
Graham McVicker committed
303 304 305 306 307 308 309 310 311 312 313 314 315 316
  Arg [1]    : int $transcriptid
               The unique database identifier of the transcript around which 
               the slice is desired
  Arg [2]    : (optional) int $size
               The length of the flanking regions the slice should encompass 
               on either side of the transcript (0 by default)
  Example    : $slc = $sa->fetch_by_transcript_id(24, 1000);
  Description: Creates a slice around the region of the specified transcript. 
               If a context size is given, the slice is extended by that 
               number of basepairs on either side of the 
               transcript. 
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : thrown on incorrect args
  Caller     : general
317 318 319

=cut

Graham McVicker's avatar
Graham McVicker committed
320
sub fetch_by_transcript_id {
321
  my ($self,$transcriptid,$size) = @_;
Graham McVicker's avatar
Graham McVicker committed
322 323

  unless( defined $transcriptid ) {
Graham McVicker's avatar
Graham McVicker committed
324 325
    $self->throw("Must have transcriptid id to fetch Slice of transcript");
  }
Graham McVicker's avatar
Graham McVicker committed
326 327 328

  $size = 0 unless(defined $size);
   
Graham McVicker's avatar
Graham McVicker committed
329 330 331 332 333
  my $ta = $self->db->get_TranscriptAdaptor;
  my $transcript_obj = $ta->fetch_by_dbID($transcriptid);
  
  my %exon_transforms;
  
334
  my $emptyslice;
Graham McVicker's avatar
Graham McVicker committed
335 336 337 338 339 340 341 342 343 344 345
  for my $exon ( @{$transcript_obj->get_all_Exons()} ) {
    $emptyslice = Bio::EnsEMBL::Slice->new( '-empty'   => 1,
					    '-adaptor' => $self,
					    '-ASSEMBLY_TYPE' =>
					    $self->db->assembly_type);     
    my $newExon = $exon->transform( $emptyslice );
    $exon_transforms{ $exon } = $newExon;
  }
  
  $transcript_obj->transform( \%exon_transforms );
  
346 347
  my $start = $transcript_obj->start() - $size;
  my $end = $transcript_obj->end() + $size;
Graham McVicker's avatar
Graham McVicker committed
348
  
349 350 351
  if($start < 1) {
    $start = 1;
  }
352
  
353 354 355
  my $slice = $self->fetch_by_chr_start_end($emptyslice->chr_name,
					    $start, $end);
  return $slice;
356 357
}

358 359


Graham McVicker's avatar
Graham McVicker committed
360
=head2 fetch_by_transcript_stable_id
361

Graham McVicker's avatar
Graham McVicker committed
362 363 364 365 366 367 368 369 370 371 372 373 374
  Arg [1]    : string $geneid
               The stable id of the gene around which the slice is 
               desired
  Arg [2]    : (optional) int $size
               The length of the flanking regions the slice should encompass 
               on either side of the gene (0 by default)
  Example    : $slc = $sa->fetch_by_transcript_stable_id('ENSG00000012123',10);
  Description: Creates a slice around the region of the specified gene. 
               If a context size is given, the slice is extended by that 
               number of basepairs on either side of the gene. 
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
375 376 377

=cut

Graham McVicker's avatar
Graham McVicker committed
378
sub fetch_by_gene_stable_id{
379 380 381 382 383 384 385
   my ($self,$geneid,$size) = @_;

   if( !defined $geneid ) {
       $self->throw("Must have gene id to fetch Slice of gene");
   }
   if( !defined $size ) {$size=0;}

Graham McVicker's avatar
Graham McVicker committed
386
   my ($chr_name,$start,$end) = $self->_get_chr_start_end_of_gene($geneid);
387 388

   if( !defined $start ) {
389
     my $type = $self->db->assembly_type()
Graham McVicker's avatar
Graham McVicker committed
390
       or $self->throw("No assembly type defined");
Graham McVicker's avatar
Graham McVicker committed
391 392
     $self->throw("Gene is not on the golden path '$type'. " .
		  "Cannot build Slice.");
393 394
   }
     
395 396 397 398 399 400 401 402
   $start -= $size;
   $end += $size;
   
   if($start < 1) {
     $start = 1;
   }

   return $self->fetch_by_chr_start_end($chr_name, $start, $end);
403 404 405
}


406

Graham McVicker's avatar
Graham McVicker committed
407
=head2 fetch_by_chr_name
Graham McVicker's avatar
Graham McVicker committed
408

Graham McVicker's avatar
Graham McVicker committed
409 410 411 412 413 414
  Arg [1]    : string $chr_name
  Example    : $slice = $slice_adaptor->fetch_by_chr_name('20'); 
  Description: Retrieves a slice on the region of an entire chromosome
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : thrown if $chr_name arg is not supplied
  Caller     : general
Graham McVicker's avatar
Graham McVicker committed
415 416 417 418 419 420 421 422 423 424 425 426 427 428

=cut

sub fetch_by_chr_name{
   my ($self,$chr_name) = @_;

   unless( $chr_name ) {
       $self->throw("Chromosome name argument required");
   }

   my $chr_start = 1;
   
   #set the end of the slice to the end of the chromosome
   my $ca = $self->db()->get_ChromosomeAdaptor();
429
   my $chromosome = $ca->fetch_by_chr_name($chr_name);
Graham McVicker's avatar
Graham McVicker committed
430 431
   my $chr_end = $chromosome->length();

432 433 434 435 436 437 438 439 440 441 442 443
   my $type = $self->db->assembly_type();

   my $slice = Bio::EnsEMBL::Slice->new
     (
      -chr_name      => $chr_name,
      -chr_start     => 1,
      -chr_end       => $chr_end,
      -assembly_type => $type,
      -adaptor       => $self
     );

   return $slice;
Graham McVicker's avatar
Graham McVicker committed
444 445
}

Graham McVicker's avatar
Graham McVicker committed
446 447


448 449 450 451 452 453 454 455 456 457 458 459
=head2 fetch_by_mapfrag

 Title   : fetch_by_mapfrag
 Usage   : $slice = $slice_adaptor->fetch_by_mapfrag('20');
 Function: Creates a slice of a "mapfrag"
 Returns : Slice object
 Args    : chromosome name


=cut

sub fetch_by_mapfrag{
460
   my ($self,$mymapfrag,$flag,$size) = @_;
461 462 463

   $flag ||= 'fixed-width'; # alt.. 'context'
   $size ||= $flag eq 'fixed-width' ? 200000 : 0;
464
   unless( $mymapfrag ) {
465 466 467 468 469 470 471
       $self->throw("Mapfrag name argument required");
   }

   my( $chr_start,$chr_end);
  
   #set the end of the slice to the end of the chromosome
   my $ca = $self->db()->get_MapFragAdaptor();
472
   my $mapfrag = $ca->fetch_by_synonym($mymapfrag);
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
   return undef unless defined $mapfrag;

   if( $flag eq 'fixed-width' ) {
       my $halfsize = int( $size/2 );
       $chr_start = $mapfrag->seq_start - $halfsize;
       $chr_end   = $mapfrag->seq_start + $size - $halfsize;
   } else {
       $chr_start     = $mapfrag->seq_start - $size;
       $chr_end       = $mapfrag->seq_end   + $size;
   }
   my $type = $self->db->assembly_type();

   my $slice = Bio::EnsEMBL::Slice->new
     (
      -chr_name      => $mapfrag->seq,
      -chr_start     => $chr_start,
      -chr_end       => $chr_end,
      -assembly_type => $type,
      -adaptor       => $self
     );

   return $slice;
}



Graham McVicker's avatar
Graham McVicker committed
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513


=head2 _get_chr_start_end_of_contig

 Title   : _get_chr_start_end_of_contig
 Usage   :
 Function: returns the chromosome name, absolute start and absolute end of the 
           specified contig
 Returns : returns chr,start,end
 Args    : contig id

=cut

sub _get_chr_start_end_of_contig {
    my ($self,$contigid) = @_;
514

Graham McVicker's avatar
Graham McVicker committed
515 516 517 518 519 520 521 522 523 524
   if( !defined $contigid ) {
       $self->throw("Must have contig id to fetch Slice of contig");
   }
   
   my $type = $self->db->assembly_type()
    or $self->throw("No assembly type defined");

   my $sth = $self->db->prepare("SELECT  c.name,
                        a.chr_start,
                        a.chr_end,
525 526
                        chr.name 
                    FROM assembly a, contig c, chromosome chr 
Graham McVicker's avatar
Graham McVicker committed
527 528
                    WHERE c.name = '$contigid' 
                    AND c.contig_id = a.contig_id 
529 530
                    AND a.type = '$type'
                    AND chr.chromosome_id = a.chromosome_id"
Graham McVicker's avatar
Graham McVicker committed
531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555
                    );
   $sth->execute();
   my ($contig,$start,$end,$chr_name) = $sth->fetchrow_array;

   if( !defined $contig ) {
     $self->throw("Contig $contigid is not on the golden path of type $type");
   }

   return ($chr_name,$start,$end);
}

=head2 _get_chr_start_end_of_gene

 Title   : get_Gene_chr_bp
 Usage   : 
 Function: 
 Returns :  
 Args    :


=cut


sub _get_chr_start_end_of_gene {
  my ($self,$geneid) =  @_;
556
  
Graham McVicker's avatar
Graham McVicker committed
557 558 559 560 561 562 563 564
  my $type = $self->db->assembly_type()
    or $self->throw("No assembly type defined");
  
  my $sth = $self->db->prepare("SELECT  
   if(a.contig_ori=1,(e.contig_start-a.contig_start+a.chr_start),
                    (a.chr_start+a.contig_end-e.contig_end)),
   if(a.contig_ori=1,(e.contig_end-a.contig_start+a.chr_start),
                    (a.chr_start+a.contig_end-e.contig_start)),
565
     chr.name
Graham McVicker's avatar
Graham McVicker committed
566 567 568 569 570
  
                    FROM    exon e,
                        transcript tr,
                        exon_transcript et,
                        assembly a,
571
                        gene_stable_id gsi,
572
                        chromosome chr
Graham McVicker's avatar
Graham McVicker committed
573 574 575 576 577
                    WHERE e.exon_id=et.exon_id 
                    AND et.transcript_id =tr.transcript_id 
                    AND a.contig_id=e.contig_id 
                    AND a.type = '$type' 
                    AND tr.gene_id = gsi.gene_id
578
                    AND gsi.stable_id = '$geneid'
579
                    AND a.chromosome_id = chr.chromosome_id" 
Graham McVicker's avatar
Graham McVicker committed
580 581
                    );
   $sth->execute();
582

Graham McVicker's avatar
Graham McVicker committed
583 584 585 586 587 588 589 590 591
   my ($start,$end,$chr);
   my @start;
   while ( my @row=$sth->fetchrow_array){
      ($start,$end,$chr)=@row;
       push @start,$start;
       push @start,$end;
   }   
   
   my @start_sorted=sort { $a <=> $b } @start;
592

Graham McVicker's avatar
Graham McVicker committed
593 594
   $start=shift @start_sorted;
   $end=pop @start_sorted;
595

Graham McVicker's avatar
Graham McVicker committed
596
   return ($chr,$start,$end);      
597
}
Graham McVicker's avatar
Graham McVicker committed
598 599 600 601 602 603 604