MappedSliceContainer.pm 17.3 KB
Newer Older
1 2
=head1 LICENSE

3
  Copyright (c) 1999-2010 The European Bioinformatics Institute and
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <ensembl-dev@ebi.ac.uk>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut
20 21 22 23 24 25 26

=head1 NAME

Bio::EnsEMBL::MappedSliceContainer - container for mapped slices

=head1 SYNOPSIS

27 28 29
  # get a reference slice
  my $slice =
    $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
30

31 32
  # create MappedSliceContainer based on the reference slice
  my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice );
33

34 35 36
  # set the adaptor for fetching AssemblySlices
  my $asa = $slice->adaptor->db->get_AssemblySliceAdaptor;
  $msc->set_AssemblySliceAdaptor($asa);
37

38 39
  # add an AssemblySlice to your MappedSliceContainer
  $msc->attach_AssemblySlice('NCBIM36');
40

41 42 43 44 45 46
  foreach my $mapped_slice ( @{ $msc->get_all_MappedSlices } ) {
    print $mapped_slice->name, "\n";

    foreach my $sf ( @{ $mapped_slice->get_all_SimpleFeatures } ) {
      print "  ", &to_string($sf), "\n";
    }
47
  }
48 49 50

=head1 DESCRIPTION

51 52
NOTE: this code is under development and not fully functional nor tested
yet.  Use only for development.
53 54

A MappedSliceContainer holds a collection of one or more
55 56 57 58 59 60 61 62 63 64 65 66
Bio::EnsEMBL::MappedSlices. It is based on a real reference slice and
contains an artificial "container slice" which defines the common
coordinate system used by all attached MappedSlices. There is also a
mapper to convert coordinates between the reference and the container
slice.

Attaching MappedSlices to the container is delegated to adaptors
(which act more as object factories than as traditional Ensembl db
adaptors). The adaptors will also modify the container slice and
associated mapper if required. This design allows us to keep the
MappedSliceContainer generic and encapsulate the data source specific
code in the adaptor/factory module.
67 68

In the simplest use case, all required MappedSlices are attached to the
69 70 71 72 73 74
MappedSliceContainer at once (by a single call to the adaptor). This
object should also allow "hot-plugging" of MappedSlices (e.g. attach a
MappedSlice representing a strain to a container that already contains a
multi-species alignment). The methods for attaching new MappedSlice will
be responsable to perform the necessary adjustments to coordinates and
mapper on the existing MappedSlices.
75 76 77

=head1 METHODS

78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
  new
  set_adaptor
  get_adaptor
  set_AssemblySliceAdaptor
  get_AssemblySliceAdaptor
  set_AlignSliceAdaptor (not implemented yet)
  get_AlignSliceAdaptor (not implemented yet)
  set_StrainSliceAdaptor (not implemented yet)
  get_StrainSliceAdaptor (not implemented yet)
  attach_AssemblySlice
  attach_AlignSlice (not implemented yet)
  attach_StrainSlice (not implemented yet)
  get_all_MappedSlices
  sub_MappedSliceContainer (not implemented yet)
  ref_slice
  container_slice
  mapper
  expanded

=head1 RELATED MODULES

  Bio::EnsEMBL::MappedSlice
  Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
  Bio::EnsEMBL::Compara::AlignSlice
  Bio::EnsEMBL::Compara::AlignSlice::Slice
  Bio::EnsEMBL::AlignStrainSlice
  Bio::EnsEMBL::StrainSlice
105 106 107

=cut

108
package Bio::EnsEMBL::MappedSliceContainer;
109 110 111 112 113 114 115

use strict;
use warnings;
no warnings 'uninitialized';

use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
116 117 118
use Bio::EnsEMBL::CoordSystem;
use Bio::EnsEMBL::Slice;
use Bio::EnsEMBL::Mapper;
119 120 121 122 123 124


# define avalable adaptormajs to use with this container
my %adaptors = map { $_ => 1 } qw(assembly align strain);


125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
=head2 new

  Arg [SLICE]     : Bio::EnsEMBL::Slice $slice - the reference slice for this
                    container
  Arg [EXPANDED]  : (optional) Boolean $expanded - set expanded mode (default:
                    collapsed)
  Example     : my $slice = $slice_adaptor->fetch_by_region('chromosome', 1,
                  9000000, 9500000);
                my $msc = Bio::EnsEMBL::MappedSliceContainer->new(
                    -SLICE    => $slice,
                    -EXPANDED => 1,
                );
  Description : Constructor. See the general documentation of this module for 
                details about this object. Note that the constructor creates an
                empty container, so you'll have to attach MappedSlices to it to
                be useful (this is usually done by an adaptor/factory).
  Return type : Bio::EnsEMBL::MappedSliceContainer
  Exceptions  : thrown on wrong or missing argument
  Caller      : general
  Status      : At Risk
              : under development

=cut

149 150 151 152
sub new {
  my $caller = shift;
  my $class = ref($caller) || $caller;

153
  my ($ref_slice, $expanded) = rearrange([qw(SLICE EXPANDED)], @_);
154 155 156

  # argument check
  unless ($ref_slice and ref($ref_slice) and
Ian Longden's avatar
Ian Longden committed
157
          ($ref_slice->isa('Bio::EnsEMBL::Slice') or $ref_slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
158 159
    throw("You must provide a reference slice.");
  }
160

161 162 163 164 165
  my $self = {};
  bless ($self, $class);

  # initialise object
  $self->{'ref_slice'} = $ref_slice;
166 167
  $self->{'expanded'} = $expanded || 0;

168 169
  $self->{'mapped_slices'} = [];

170 171 172
  # create the container slice
  $self->_create_container_slice($ref_slice);

173 174 175 176
  return $self;
}


177 178 179 180 181 182 183 184 185 186
#
# Create an artificial slice which represents the common coordinate system used
# for this MappedSliceContainer
#
sub _create_container_slice {
  my $self = shift;
  my $ref_slice = shift;

  # argument check
  unless ($ref_slice and ref($ref_slice) and
Ian Longden's avatar
Ian Longden committed
187
          ($ref_slice->isa('Bio::EnsEMBL::Slice') or $ref_slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
    throw("You must provide a reference slice.");
  }

  # create an artificial coordinate system for the container slice
  my $cs = Bio::EnsEMBL::CoordSystem->new(
      -NAME => 'container',
      -RANK => 1,
  );

  # Create a new artificial slice spanning your container. Initially this will
  # simply span your reference slice
  my $container_slice = Bio::EnsEMBL::Slice->new(
      -COORD_SYSTEM     => $cs,
      -START            => 1,
      -END              => $ref_slice->length,
      -STRAND           => 1,
      -SEQ_REGION_NAME  => 'container',
  );

  $self->{'container_slice'} = $container_slice;

  # Create an Mapper to map to/from the reference slice to the container coord
  # system.
  my $mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container');
  
  $mapper->add_map_coordinates(
      $ref_slice->seq_region_name,
      $ref_slice->start,
      $ref_slice->end,
      1,
      $container_slice->seq_region_name,
      $container_slice->start,
      $container_slice->end,
  );

  $self->{'mapper'} = $mapper;
}


=head2 set_adaptor
228

229 230 231 232 233 234 235 236 237
  Arg[1]      : String $type - the type of adaptor to set
  Arg[2]      : Adaptor $adaptor - the adaptor to set
  Example     : my $adaptor = Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor->new;
                $msc->set_adaptor('assembly', $adaptor);
  Description : Parameterisable wrapper for all methods that set adaptors (see
                below).
  Return type : same as Arg 2
  Exceptions  : thrown on missing type
  Caller      : general
238 239 240 241 242 243 244 245
  Status      : At Risk
              : under development

=cut

sub set_adaptor {
  my $self = shift;
  my $type = shift;
246
  my $adaptor = shift;
247 248 249 250 251 252 253 254 255

  # argument check
  unless ($type and $adaptors{$type}) {
    throw("Missing or unknown adaptor type.");
  }

  $type = ucfirst($type);
  my $method = "set_${type}SliceAdaptor";

256
  return $self->$method($adaptor);
257 258 259
}


260
=head2 get_adaptor
261

262 263 264 265 266 267 268
  Arg[1]      : String $type - the type of adaptor to get
  Example     : my $assembly_slice_adaptor = $msc->get_adaptor('assembly');
  Description : Parameterisable wrapper for all methods that get adaptors (see
                below).
  Return type : An adaptor for the requested type of MappedSlice.
  Exceptions  : thrown on missing type
  Caller      : general
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
  Status      : At Risk
              : under development

=cut

sub get_adaptor {
  my $self = shift;
  my $type = shift;

  # argument check
  unless ($type and $adaptors{$type}) {
    throw("Missing or unknown adaptor type.");
  }

  $type = ucfirst($type);
  my $method = "get_${type}SliceAdaptor";

  return $self->$method;
}


290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
=head2 set_AssemblySliceAdaptor

  Arg[1]      : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor - the adaptor to set
  Example     : my $adaptor = Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor->new;
                $msc->set_AssemblySliceAdaptor($adaptor);
  Description : Sets an AssemblySliceAdaptor for this container. The adaptor can
                be used to attach MappedSlice for alternative assemblies.
  Return type : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
  Exceptions  : thrown on wrong or missing argument
  Caller      : general, $self->get_adaptor
  Status      : At Risk
              : under development

=cut

305 306 307 308
sub set_AssemblySliceAdaptor {
  my $self = shift;
  my $assembly_slice_adaptor = shift;

Patrick Meidl's avatar
bug fix  
Patrick Meidl committed
309 310 311
  unless ($assembly_slice_adaptor and ref($assembly_slice_adaptor) and
    $assembly_slice_adaptor->isa('Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor')) {
      throw("Need a Bio::EnsEMBL::AssemblySliceAdaptor.");
312 313 314 315 316 317
  }

  $self->{'adaptors'}->{'AssemblySlice'} = $assembly_slice_adaptor;
}


318 319 320 321 322 323 324 325 326 327 328 329 330
=head2 get_AssemblySliceAdaptor

  Example     : my $assembly_slice_adaptor = $msc->get_AssemblySliceAdaptor;
  Description : Gets a AssemblySliceAdaptor from this container. The adaptor can
                be used to attach MappedSlice for alternative assemblies.
  Return type : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
  Exceptions  : thrown on wrong or missing argument
  Caller      : general, $self->get_adaptor
  Status      : At Risk
              : under development

=cut

331 332 333 334 335 336 337 338 339 340 341
sub get_AssemblySliceAdaptor {
  my $self = shift;

  unless ($self->{'adaptors'}->{'AssemblySlice'}) {
    warning("No AssemblySliceAdaptor attached to MappedSliceContainer.");
  }

  return $self->{'adaptors'}->{'AssemblySlice'};
}


342
# [todo]
343 344 345 346 347
sub set_AlignSliceAdaptor {
  throw("Not implemented yet!");
}


348
# [todo]
349 350 351 352 353
sub get_AlignSliceAdaptor {
  throw("Not implemented yet!");
}


354
# [todo]
355
sub set_StrainSliceAdaptor {
William McLaren's avatar
William McLaren committed
356 357 358 359 360 361 362 363 364
  my $self = shift;
  my $strain_slice_adaptor = shift;

  unless ($strain_slice_adaptor and ref($strain_slice_adaptor) and
    $strain_slice_adaptor->isa('Bio::EnsEMBL::DBSQL::StrainSliceAdaptor')) {
      throw("Need a Bio::EnsEMBL::StrainSliceAdaptor.");
  }

  $self->{'adaptors'}->{'StrainSlice'} = $strain_slice_adaptor;
365 366 367
}


368
# [todo]
369
sub get_StrainSliceAdaptor {
William McLaren's avatar
William McLaren committed
370 371 372 373 374 375 376
  my $self = shift;

  unless ($self->{'adaptors'}->{'StrainSlice'}) {
    warning("No StrainSliceAdaptor attached to MappedSliceContainer.");
  }

  return $self->{'adaptors'}->{'StrainSlice'};
377 378 379
}


380 381 382 383 384 385 386 387 388 389 390 391 392 393
=head2 attach_AssemblySlice

  Arg[1]      : String $version - assembly version to attach
  Example     : $msc->attach_AssemblySlice('NCBIM36');
  Description : Attaches a MappedSlice for an alternative assembly to this
                container.
  Return type : none
  Exceptions  : thrown on missing argument
  Caller      : general, Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
  Status      : At Risk
              : under development

=cut

394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
sub attach_AssemblySlice {
  my $self = shift;
  my $version = shift;

  throw("Need a version.") unless ($version);

  my $asa = $self->get_AssemblySliceAdaptor;
  return unless ($asa);

  my @mapped_slices = @{ $asa->fetch_by_version($self, $version) };

  push @{ $self->{'mapped_slices'} }, @mapped_slices;
}


William McLaren's avatar
William McLaren committed
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
=head2 attach_StrainSlice

  Arg[1]      : String $strain - name of strain to attach
  Example     : $msc->attach_StrainSlice('Watson');
  Description : Attaches a MappedSlice for an alternative strain to this
                container.
  Return type : none
  Exceptions  : thrown on missing argument
  Caller      : general, Bio::EnsEMBL::DBSQL::StrainSliceAdaptor
  Status      : At Risk
              : under development

=cut

sub attach_StrainSlice {
  my $self = shift;
  my $strain = shift;

  throw("Need a strain.") unless ($strain);

  my $ssa = $self->get_StrainSliceAdaptor;
  return unless ($ssa);

  my @mapped_slices = @{ $ssa->fetch_by_name($self, $strain) };

  push @{ $self->{'mapped_slices'} }, @mapped_slices;
}



439
=head2 get_all_MappedSlices
440

441 442 443 444 445 446 447 448 449 450 451
  Example     : foreach my $mapped_slice (@{ $msc->get_all_MappedSlices }) {
                  print $mapped_slice->name, "\n";
                }
  Description : Returns all MappedSlices attached to this container.
  Return type : listref of Bio::EnsEMBL::MappedSlice
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut
452 453 454 455 456 457 458

sub get_all_MappedSlices {
  my $self = shift;
  return $self->{'mapped_slices'};
}


459
# [todo]
460 461 462 463 464
sub sub_MappedSliceContainer {
  throw("Not implemented yet!");
}


465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
=head2 ref_slice

  Arg[1]      : (optional) Bio::EnsEMBL::Slice - the reference slice to set
  Example     : my $ref_slice = $mapped_slice_container->ref_slice;
                print "This MappedSliceContainer is based on the reference
                  slice ", $ref_slice->name, "\n";
  Description : Getter/setter for the reference slice.
  Return type : Bio::EnsEMBL::Slice
  Exceptions  : thrown on wrong argument type
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub ref_slice {
  my $self = shift;
  
  if (@_) {
    my $slice = shift;
    
Ian Longden's avatar
Ian Longden committed
486
    unless (ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))) {
487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
      throw("Need a Bio::EnsEMBL::Slice.");
    }
    
    $self->{'ref_slice'} = $slice;
  }

  return $self->{'ref_slice'};
}


=head2 container_slice

  Arg[1]      : (optional) Bio::EnsEMBL::Slice - the container slice to set
  Example     : my $container_slice = $mapped_slice_container->container_slice;
                print "The common slice used by this MappedSliceContainer is ",
                  $container_slice->name, "\n";
  Description : Getter/setter for the container slice. This is an artificial
                slice which defines the common coordinate system used by the
                MappedSlices attached to this container.
  Return type : Bio::EnsEMBL::Slice
  Exceptions  : thrown on wrong argument type
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub container_slice {
  my $self = shift;
  
  if (@_) {
    my $slice = shift;
    
Ian Longden's avatar
Ian Longden committed
520
    unless (ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
      throw("Need a Bio::EnsEMBL::Slice.");
    }
    
    $self->{'container_slice'} = $slice;
  }

  return $self->{'container_slice'};
}


=head2 mapper

  Arg[1]      : (optional) Bio::EnsEMBL::Mapper - the mapper to set
  Example     : my $mapper = Bio::EnsEMBL::Mapper->new('ref', 'mapped');
                $mapped_slice_container->mapper($mapper);
  Description : Getter/setter for the mapper to map between reference slice and
                the artificial container coord system.
  Return type : Bio::EnsEMBL::Mapper
  Exceptions  : thrown on wrong argument type
  Caller      : internal, Bio::EnsEMBL::MappedSlice->AUTOLOAD
  Status      : At Risk
              : under development

=cut

sub mapper {
  my $self = shift;
  
  if (@_) {
    my $mapper = shift;
    
    unless (ref($mapper) and $mapper->isa('Bio::EnsEMBL::Mapper')) {
      throw("Need a Bio::EnsEMBL::Mapper.");
    }
    
    $self->{'mapper'} = $mapper;
  }

  return $self->{'mapper'};
}


=head2 expanded

  Arg[1]      : (optional) Boolean - expanded mode to set
  Example     : if ($mapped_slice_container->expanded) {
                  # do more elaborate mapping than in collapsed mode
                  [...]
                }
  Description : Getter/setter for expanded mode.
                
                By default, MappedSliceContainer use collapsed mode, which
                means that no inserts in the reference sequence are allowed
                when constructing the MappedSlices. in this mode, the
                mapped_slice artificial coord system will be identical with the
                ref_slice coord system.
                
                By setting expanded mode, you allow inserts in the reference
                sequence.
  Return type : Boolean
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub expanded {
  my $self = shift;
  $self->{'expanded'} = shift if (@_);
  return $self->{'expanded'};
}

594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
=head2 seq

  Example     : my $seq = $container->seq()
  Description : Retrieves the expanded sequence of the artificial container
                slice, including "-" characters where there are inserts in any
                of the attached mapped slices.
  Return type : String
  Exceptions  : none
  Caller      : general
  Status      : At Risk
              : under development

=cut

sub seq {
  my $self = shift;
  
  my $container_seq = '';
  
  # check there's a mapper
  if(defined($self->mapper)) {
    my $start = 0;
    my $slice = $self->ref_slice();
    my $seq = $slice->seq();
    
    foreach my $coord($self->mapper->map_coordinates($slice->seq_region_name, $slice->start, $slice->end, $slice->strand, 'ref_slice')) {
      # if it is a normal coordinate insert sequence
      if(!$coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate')) {
        $container_seq .= substr($seq, $start, $coord->length());
        $start += $coord->length;
      }
      
      # if it is a gap or indel insert "-"
      else {
        $container_seq .= '-' x $coord->length();
      }
    }
  }
  
  return $container_seq;
}

636

637 638
1;