SliceAdaptor.pm 13.1 KB
Newer Older
1 2

#
Simon Potter's avatar
pod  
Simon Potter committed
3
# Ensembl module for Bio::EnsEMBL::DBSQL::SliceAdaptor
4 5 6 7 8 9 10 11 12 13 14
#
# Cared for by Ewan Birney <ensembl-dev@ebi.ac.uk>
#
# Copyright Ewan Birney
#
# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

15
Bio::EnsEMBL::DBSQL::SliceAdaptor - Adaptors for slices
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45

=head1 SYNOPSIS
  



=head1 DESCRIPTION

Factory for getting out slices of assemblies. WebSlice is the highly
accelerated version for the web site.

=head1 AUTHOR - Ewan Birney

This modules is part of the Ensembl project http://www.ensembl.org

Email ensembl-dev@ebi.ac.uk

Describe contact details here

=head1 APPENDIX

The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _

=cut


# Let the code begin...


46
package Bio::EnsEMBL::DBSQL::SliceAdaptor;
47 48 49
use vars qw(@ISA);
use strict;

50 51

# Object preamble - inherits from Bio::EnsEMBL::Root
52 53
use Bio::EnsEMBL::DBSQL::BaseAdaptor;
use Bio::EnsEMBL::Slice;
54
use Bio::EnsEMBL::DBSQL::DBAdaptor;
55

56

57
@ISA = ('Bio::EnsEMBL::DBSQL::BaseAdaptor');
58 59


Graham McVicker's avatar
Graham McVicker committed
60
# new is inherited from BaseAdaptor
61

62

63

64
=head2 fetch_by_chr_start_end
65

66 67 68 69 70 71 72 73 74 75 76 77 78
  Arg [1]    : string $chr
               the name of the chromosome to obtain a slice for
  Arg [2]    : int $start
               the start basepair of the slice to obtain in chromosomal 
               coordinates
  Arg [3]    : int $end 
               the end basepair of the slice to obtain in chromosomal 
               coordinates
  Example    : $slice = $slice_adaptor->fetch_by_chr_start_end();
  Description: Creates a slice object on the given chromosome and coordinates.
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
79 80 81

=cut

Graham McVicker's avatar
Graham McVicker committed
82
sub fetch_by_chr_start_end {
83 84
    my ($self,$chr,$start,$end) = @_;

85 86
    unless($chr) {
      $self->throw("chromosome name argument must be defined and not ''");
87 88
    }

89 90 91 92 93 94 95 96 97
    unless(defined $end) {   # Why defined?  Is '0' a valid end?
      $self->throw("end argument must be defined\n");
    }

    unless(defined $start) {
      $self->throw("start argument must be defined\n");
    }

    if($start > $end) {
98
      $self->throw("start must be less than end: parameters $chr:$start:$end");
99
    }
100
    
101
    my $slice;
102
    my $type = $self->db->assembly_type();
103

Graham McVicker's avatar
Graham McVicker committed
104
    $slice = Bio::EnsEMBL::Slice->new(
105 106 107 108
          -chr_name      => $chr,
          -chr_start     => $start,
          -chr_end       => $end,
          -assembly_type => $type,
109
          -adaptor       => $self
Graham McVicker's avatar
Graham McVicker committed
110
	 );
111 112 113 114 115 116

    return $slice;
}



117
=head2 fetch_by_contig_name
118

119
 Title   : fetch_by_contig_name
Graham McVicker's avatar
Graham McVicker committed
120 121 122 123 124
 Usage   : $slice = $slice_adptr->fetch_by_contig_name('AC000012.00001',1000);
 Function: Creates a slice around the the specified contig.  If a context 
           size is given, the slice is extended by that number of basepairs 
           on either side of the contig.
 Returns : Bio::EnsEMBL::Slice object 
125 126 127 128 129
 Args    : contig id, [context size in bp]


=cut

130
sub fetch_by_contig_name {
131 132 133 134
   my ($self,$contigid,$size) = @_;

   if( !defined $size ) {$size=0;}

135
   my ($chr_name,$start,$end) = $self->_get_chr_start_end_of_contig($contigid);
136

137 138 139 140 141 142 143 144
   $start -= $size;
   $end += $size;

   if($start < 1) {
     $start  = 1;
   }

   return $self->fetch_by_chr_start_end($chr_name, $start, $end);
Graham McVicker's avatar
Graham McVicker committed
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
 }


=head2 fetch_by_fpc_name

 Title   : fetch_by_fpc_name
 Usage   :
 Function: create a Slice representing a complete FPC contig
 Example :
 Returns : 
 Args    : the FPC contig id.


=cut

sub fetch_by_fpc_name {
    my ($self,$fpc_name) = @_;

    my $type = $self->db->assembly_type();

    my $sth = $self->db->prepare("
166 167
        SELECT chr.name, a.superctg_ori, MIN(a.chr_start), MAX(a.chr_end)
        FROM assembly a, chromosome chr
Graham McVicker's avatar
Graham McVicker committed
168 169
        WHERE superctg_name = '$fpc_name'
        AND type = '$type'
170
        AND chr.chromosome_id = a.chromosome_id
Graham McVicker's avatar
Graham McVicker committed
171 172 173 174 175 176 177 178 179
        GROUP by superctg_name
        ");

    $sth->execute;

    my ($chr, $strand, $slice_start, $slice_end) = $sth->fetchrow_array;

    my $slice;

180 181 182 183 184 185 186 187
    $slice = new Bio::EnsEMBL::Slice
      (
       -chr_name => $chr,
       -chr_start =>$slice_start,
       -chr_end => $slice_end,
       -strand => $strand,
       -assembly_type => $type
      );
Graham McVicker's avatar
Graham McVicker committed
188 189

    return $slice;
190 191 192 193
}



Graham McVicker's avatar
Graham McVicker committed
194 195 196 197
=head2 fetch_by_clone_accession

 Title   : fetch_by_clone_accession
 Usage   : $slice = $slice_adaptor->fetch_by_clone_accession('AC000012',1000);
Graham McVicker's avatar
Graham McVicker committed
198 199 200
 Function: Creates a Slice around the specified clone.  If a context size is 
           given, the Slice is extended by that number of basepairs on either 
           side of the clone.  Throws if the clone is not golden.
201 202 203 204 205
 Returns : Slice object 
 Args    : clone id, [context size in bp]

=cut

Graham McVicker's avatar
Graham McVicker committed
206
sub fetch_by_clone_accession{
207 208 209
   my ($self,$clone,$size) = @_;

   if( !defined $clone ) {
Graham McVicker's avatar
Graham McVicker committed
210
     $self->throw("Must have clone to fetch Slice of clone");
211 212 213
   }
   if( !defined $size ) {$size=0;}

214
   my $type = $self->db->assembly_type()
215 216 217 218 219
    or $self->throw("No assembly type defined");

   my $sth = $self->db->prepare("SELECT  c.name,
                        a.chr_start,
                        a.chr_end,
220
                        chr.name 
221 222
                    FROM    assembly a, 
                        contig c, 
223 224
                        clone  cl,
                        chromosome chr
225 226 227
                    WHERE c.clone_id = cl.clone_id
                    AND cl.name = '$clone'  
                    AND c.contig_id = a.contig_id 
228 229
                    AND a.type = '$type'
                    AND chr.chromosome_id = a.chromosome_id
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
                    ORDER BY a.chr_start"
                    );
   $sth->execute();
 
   my ($contig,$start,$end,$chr_name); 
   my $counter; 
   my $first_start;
   while ( my @row=$sth->fetchrow_array){
       $counter++;
       ($contig,$start,$end,$chr_name)=@row;
       if ($counter==1){$first_start=$start;}      
   }

   if( !defined $contig ) {
       $self->throw("Clone is not on the golden path. Cannot build Slice");
   }
     
247 248 249 250 251 252 253 254
   $first_start -= $size;
   $end += $size;

   if($first_start < 1) {
     $first_start = 1;
   }

   my $slice = $self->fetch_by_chr_start_end($chr_name, $first_start, $end);
255 256 257 258 259
   return $slice;
}



Graham McVicker's avatar
Graham McVicker committed
260
=head2 fetch_by_transcript_stable_id
261

Graham McVicker's avatar
Graham McVicker committed
262 263
 Title   : fetch_by_transcript_stable_id
 Usage   : $slice = $slice_adaptor->fetch_by_transcript_stable_id(
264 265 266 267 268 269 270 271 272 273 274
                                       'ENST00000302930',1000);
 Function: Creates a slice of the specified object.  If a context
           size is given, the slice is extended by that number of
	   basepairs on either side of the transcript.  Throws if
	   the transcript is not golden.
 Returns : Slice object 
 Args    : transcript stable ID, [context size in bp]


=cut

Graham McVicker's avatar
Graham McVicker committed
275
sub fetch_by_transcript_stable_id{
276 277 278 279 280 281
  my ($self,$transcriptid,$size) = @_;

  # Just get the dbID, then fetch slice by that
  my $ta = $self->db->get_TranscriptAdaptor;
  my $transcript_obj = $ta->fetch_by_stable_id($transcriptid);
  my $dbID = $transcript_obj->dbID;
Graham McVicker's avatar
Graham McVicker committed
282 283
  
  return $self->fetch_by_transcript_id($dbID, $size);
284 285
}

286

Graham McVicker's avatar
Graham McVicker committed
287 288 289 290
=head2 fetch_by_transcript_id

 Title   : fetch_by_transcript_id
 Usage   : $slice = $slice_adaptor->fetch_by_transcript_id(24,1000);
291 292 293 294 295 296 297 298 299
 Function: Creates a slice of the specified object.  If a context
           size is given, the slice is extended by that number of
	   basepairs on either side of the transcript.  Throws if
	   the transcript is not golden.
 Returns : Slice object 
 Args    : transcript dbID, [context size in bp]

=cut

Graham McVicker's avatar
Graham McVicker committed
300
sub fetch_by_transcript_id {
301
  my ($self,$transcriptid,$size) = @_;
Graham McVicker's avatar
Graham McVicker committed
302 303 304 305 306 307 308 309 310 311
  if( !defined $transcriptid ) {
    $self->throw("Must have transcriptid id to fetch Slice of transcript");
  }
  if( !defined $size ) {$size=0;}
  
  my $ta = $self->db->get_TranscriptAdaptor;
  my $transcript_obj = $ta->fetch_by_dbID($transcriptid);
  
  my %exon_transforms;
  
312
  my $emptyslice;
Graham McVicker's avatar
Graham McVicker committed
313 314 315 316 317 318 319 320 321 322 323
  for my $exon ( @{$transcript_obj->get_all_Exons()} ) {
    $emptyslice = Bio::EnsEMBL::Slice->new( '-empty'   => 1,
					    '-adaptor' => $self,
					    '-ASSEMBLY_TYPE' =>
					    $self->db->assembly_type);     
    my $newExon = $exon->transform( $emptyslice );
    $exon_transforms{ $exon } = $newExon;
  }
  
  $transcript_obj->transform( \%exon_transforms );
  
324 325
  my $start = $transcript_obj->start() - $size;
  my $end = $transcript_obj->end() + $size;
Graham McVicker's avatar
Graham McVicker committed
326
  
327 328 329
  if($start < 1) {
    $start = 1;
  }
330
  
331 332 333
  my $slice = $self->fetch_by_chr_start_end($emptyslice->chr_name,
					    $start, $end);
  return $slice;
334 335
}

336

Graham McVicker's avatar
Graham McVicker committed
337
=head2 fetch_by_gene_stable_id
338

Graham McVicker's avatar
Graham McVicker committed
339
 Title   : fetch_by_gene_stable_id
Graham McVicker's avatar
Graham McVicker committed
340 341 342 343
 Usage   : $slice = $slc_adptr->fetch_by_gene_stable_id('ENSG00000012123',100);
 Function: Creates a slice around the specified gene.  If a context size is 
           given, the slice is extended by that number of basepairs on either 
           side of the gene.  Throws if the gene is not golden.
344 345 346 347 348 349
 Returns : Slice object 
 Args    : gene id, [context size in bp]


=cut

Graham McVicker's avatar
Graham McVicker committed
350
sub fetch_by_gene_stable_id{
351 352 353 354 355 356 357
   my ($self,$geneid,$size) = @_;

   if( !defined $geneid ) {
       $self->throw("Must have gene id to fetch Slice of gene");
   }
   if( !defined $size ) {$size=0;}

Graham McVicker's avatar
Graham McVicker committed
358
   my ($chr_name,$start,$end) = $self->_get_chr_start_end_of_gene($geneid);
359 360

   if( !defined $start ) {
361
     my $type = $self->db->assembly_type()
Graham McVicker's avatar
Graham McVicker committed
362
       or $self->throw("No assembly type defined");
Graham McVicker's avatar
Graham McVicker committed
363 364
     $self->throw("Gene is not on the golden path '$type'. " .
		  "Cannot build Slice.");
365 366
   }
     
367 368 369 370 371 372 373 374
   $start -= $size;
   $end += $size;
   
   if($start < 1) {
     $start = 1;
   }

   return $self->fetch_by_chr_start_end($chr_name, $start, $end);
375 376 377
}


Graham McVicker's avatar
Graham McVicker committed
378
=head2 fetch_by_chr_name
379

Graham McVicker's avatar
Graham McVicker committed
380 381
 Title   : fetch_by_chr_name
 Usage   : $slice = $slice_adaptor->fetch_by_chr_name('20');
382 383 384 385
 Function: Creates a slice of an entire chromosome. Note that is the start coordinate
           of the chromosome is > 1 ( see assembly table, e.g.: 'select min(chr_start) 
           from assembly where chromosome_id =?') this will put Ns at the beginning of the
           slice sequence.
Graham McVicker's avatar
Graham McVicker committed
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
 Returns : Slice object 
 Args    : chromosome name


=cut

sub fetch_by_chr_name{
   my ($self,$chr_name) = @_;

   unless( $chr_name ) {
       $self->throw("Chromosome name argument required");
   }

   my $chr_start = 1;
   
   #set the end of the slice to the end of the chromosome
   my $ca = $self->db()->get_ChromosomeAdaptor();
403
   my $chromosome = $ca->fetch_by_chr_name($chr_name);
Graham McVicker's avatar
Graham McVicker committed
404 405
   my $chr_end = $chromosome->length();

406 407 408 409 410 411 412 413 414 415 416 417
   my $type = $self->db->assembly_type();

   my $slice = Bio::EnsEMBL::Slice->new
     (
      -chr_name      => $chr_name,
      -chr_start     => 1,
      -chr_end       => $chr_end,
      -assembly_type => $type,
      -adaptor       => $self
     );

   return $slice;
Graham McVicker's avatar
Graham McVicker committed
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
}



=head2 _get_chr_start_end_of_contig

 Title   : _get_chr_start_end_of_contig
 Usage   :
 Function: returns the chromosome name, absolute start and absolute end of the 
           specified contig
 Returns : returns chr,start,end
 Args    : contig id

=cut

sub _get_chr_start_end_of_contig {
    my ($self,$contigid) = @_;
435

Graham McVicker's avatar
Graham McVicker committed
436 437 438 439 440 441 442 443 444 445
   if( !defined $contigid ) {
       $self->throw("Must have contig id to fetch Slice of contig");
   }
   
   my $type = $self->db->assembly_type()
    or $self->throw("No assembly type defined");

   my $sth = $self->db->prepare("SELECT  c.name,
                        a.chr_start,
                        a.chr_end,
446 447
                        chr.name 
                    FROM assembly a, contig c, chromosome chr 
Graham McVicker's avatar
Graham McVicker committed
448 449
                    WHERE c.name = '$contigid' 
                    AND c.contig_id = a.contig_id 
450 451
                    AND a.type = '$type'
                    AND chr.chromosome_id = a.chromosome_id"
Graham McVicker's avatar
Graham McVicker committed
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
                    );
   $sth->execute();
   my ($contig,$start,$end,$chr_name) = $sth->fetchrow_array;

   if( !defined $contig ) {
     $self->throw("Contig $contigid is not on the golden path of type $type");
   }

   return ($chr_name,$start,$end);
}

=head2 _get_chr_start_end_of_gene

 Title   : get_Gene_chr_bp
 Usage   : 
 Function: 
 Returns :  
 Args    :


=cut


sub _get_chr_start_end_of_gene {
  my ($self,$geneid) =  @_;
477
  
Graham McVicker's avatar
Graham McVicker committed
478 479 480 481 482 483 484 485
  my $type = $self->db->assembly_type()
    or $self->throw("No assembly type defined");
  
  my $sth = $self->db->prepare("SELECT  
   if(a.contig_ori=1,(e.contig_start-a.contig_start+a.chr_start),
                    (a.chr_start+a.contig_end-e.contig_end)),
   if(a.contig_ori=1,(e.contig_end-a.contig_start+a.chr_start),
                    (a.chr_start+a.contig_end-e.contig_start)),
486
     chr.name
Graham McVicker's avatar
Graham McVicker committed
487 488 489 490 491
  
                    FROM    exon e,
                        transcript tr,
                        exon_transcript et,
                        assembly a,
492
                        gene_stable_id gsi,
493
                        chromosome chr
Graham McVicker's avatar
Graham McVicker committed
494 495 496 497 498
                    WHERE e.exon_id=et.exon_id 
                    AND et.transcript_id =tr.transcript_id 
                    AND a.contig_id=e.contig_id 
                    AND a.type = '$type' 
                    AND tr.gene_id = gsi.gene_id
499
                    AND gsi.stable_id = '$geneid'
500
                    AND a.chromosome_id = chr.chromosome_id" 
Graham McVicker's avatar
Graham McVicker committed
501 502
                    );
   $sth->execute();
503

Graham McVicker's avatar
Graham McVicker committed
504 505 506 507 508 509 510 511 512
   my ($start,$end,$chr);
   my @start;
   while ( my @row=$sth->fetchrow_array){
      ($start,$end,$chr)=@row;
       push @start,$start;
       push @start,$end;
   }   
   
   my @start_sorted=sort { $a <=> $b } @start;
513

Graham McVicker's avatar
Graham McVicker committed
514 515
   $start=shift @start_sorted;
   $end=pop @start_sorted;
516

Graham McVicker's avatar
Graham McVicker committed
517
   return ($chr,$start,$end);      
518
}