CoordSystem.pm 8.45 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#
# EnsEMBL module for Bio::EnsEMBL::CoordSystem
#

=head1 NAME

Bio::EnsEMBL::CoordSystem

=head1 SYNOPSIS

  my $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...);

  my $csa = $db->get_CoordSystemAdaptor();

  #
  # Get all coord systems in the database:
  #
  foreach my $cs (@{$csa->fetch_all()}) {
    my $str = join ':', $cs->name(),$cs->version(),$cs->dbID();
    print "$str\n";
  }

=head1 DESCRIPTION

This is a simple object which contains a few coordinate system attributes:
name, internal identifier, version.  A coordinate system is uniquely defined
by its name and version.  A version of a coordinate system applies to all
sequences within a coordinate system.  This should not be confused with
individual sequence versions.

Take for example the Human assembly.  The version 'NCBI33' applies to
to all chromosomes in the NCBI33 assembly (that is the entire 'chromosome'
coordinate system).  The 'clone' coordinate system in the same database would
have no version however.  Although the clone sequences have their own sequence
versions, there is no version which applies to the entire set of clones.

Coordinate system objects are immutable. Their name and version, and other
attributes may not be altered after they are created.

=head1 AUTHOR - Graham McVicker

=head1 CONTACT

Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk

=head1 METHODS

=cut


use strict;
use warnings;

package Bio::EnsEMBL::CoordSystem;

use Bio::EnsEMBL::Storable;

58
59
use Bio::EnsEMBL::Utils::Argument  qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(throw);
60
61
62
63
64
65
66
67
68
69
70

use vars qw(@ISA);

@ISA = qw(Bio::EnsEMBL::Storable);


=head2 new

  Arg [..]   : List of named arguments:
               -NAME      - The name of the coordinate system
               -VERSION   - (optional) The version of the coordinate system
71
72
73
74
75
76
               -RANK      - The rank of the coordinate system. The highest
                            level coordinate system should have rank 1, the
                            second highest rank 2 and so on.  An example of
                            a high level coordinate system is 'chromosome' an
                            example of a lower level coordinate system is
                            'clone'.
77
               -TOP_LEVEL - (optional) Sets whether this is a top-level coord
78
79
80
                            system. Default = 0. This should only be set to
                            true if you are creating an artificial toplevel
                            coordsystem by the name of 'toplevel'
81
82
               -SEQUENCE_LEVEL - (optional) Sets whether this is a sequence
                            level coordinate system. Default = 0
83
               -DEFAULT   - (optional)
84
85
86
87
88
89
                            Whether this is the default version of the 
                            coordinate systems of this name. Default = 0
               -DBID      - (optional) The internal identifier of this
                             coordinate system
               -ADAPTOR   - (optional) The adaptor which provides database
                            interaction for this object
90
  Example    : $cs = Bio::EnsEMBL::CoordSystem->new(-NAME    => 'chromosome',
91
                                                    -VERSION => 'NCBI33',
92
                                                    -RANK    => 1,
93
94
95
                                                    -DBID    => 1,
                                                    -ADAPTOR => adaptor,
                                                    -DEFAULT => 1,
96
                                                    -SEQUENCE_LEVEL => 0);
97
98
99
100
101
102
103
104
105
106
107
108
109
110
  Description: Creates a new CoordSystem object representing a coordinate
               system.
  Returntype : Bio::EnsEMBL::CoordSystem
  Exceptions : none
  Caller     : general

=cut

sub new {
  my $caller = shift;
  my $class = ref($caller) || $caller;

  my $self = $class->SUPER::new(@_);

111
  my ($name,$version, $top_level, $sequence_level, $default, $rank) =
112
    rearrange(['NAME','VERSION','TOP_LEVEL', 'SEQUENCE_LEVEL',
113
               'DEFAULT', 'RANK'], @_);
114
115
116
117
118

  throw('The NAME argument is required') if(!$name);

  $version = '' if(!defined($version));

119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
  $top_level       = ($top_level)      ? 1 : 0;
  $sequence_level  = ($sequence_level) ? 1 : 0;
  $default         = ($default)        ? 1 : 0;
  $rank ||= 0;

  if($top_level) {
    if($rank) {
      throw('RANK argument must be 0 if TOP_LEVEL is 1');
    }

    if($name) {
      if($name ne 'toplevel') {
        throw('The NAME argument must be "toplevel" if TOP_LEVEL is 1')
      }
    } else {
      $name = 'toplevel';
    }

    if($sequence_level) {
      throw("SEQUENCE_LEVEL argument must be 0 if TOP_LEVEL is 1");
    }

    $default = 0;

  } else {
    if(!$rank) {
      throw("RANK argument must be non-zero if not toplevel CoordSystem");
    }
    if($name eq 'toplevel') {
      throw("Cannot name coord system 'toplevel' unless TOP_LEVEL is 1");
    }
  }

  if($rank !~ /^\d+$/) {
    throw('The RANK argument must be a positive integer');
  }
155
156
157
158
159
160

  $self->{'version'} = $version;
  $self->{'name'} = $name;
  $self->{'top_level'} = $top_level;
  $self->{'sequence_level'} = $sequence_level;
  $self->{'default'} = $default;
161
  $self->{'rank'}    = $rank;
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241

  return $self;
}


=head2 name

  Arg [1]    : (optional) string $name
  Example    : print $coord_system->name();
  Description: Getter for the name of this coordinate system
  Returntype : string
  Exceptions : none
  Caller     : general

=cut

sub name {
  my $self = shift;
  return $self->{'name'};
}



=head2 version

  Arg [1]    : none
  Example    : print $coord->version();
  Description: Getter for the version of this coordinate system.  This
               will return an empty string if no version is defined for this
               coordinate system.
  Returntype : string
  Exceptions : none
  Caller     : general

=cut

sub version {
  my $self = shift;
  return $self->{'version'};
}




=head2 equals

  Arg [1]    : Bio::EnsEMBL::CoordSystem $cs
               The coord system to compare to for equality.
  Example    : if($coord_sys->equals($other_coord_sys)) { ... }
  Description: Compares 2 coordinate systems and returns true if they are
               equivalent.  The definition of equivalent is sharing the same
               name and version.
  Returntype : string
  Exceptions : none
  Caller     : general

=cut

sub equals {
  my $self = shift;
  my $cs = shift;

  if(!$cs || !ref($cs) || !$cs->isa('Bio::EnsEMBL::CoordSystem')) {
    throw('Argument must be a Bio::EnsEMBL::CoordSystem');
  }

  if($self->{'version'} eq $cs->version() && $self->{'name'} eq $cs->name()) {
    return 1;
  }

  return 0;
}




=head2 is_top_level

  Arg [1]    : none
  Example    : if($coord_sys->is_top_level()) { ... }
242
243
244
245
246
  Description: Returns true if this is the toplevel pseudo coordinate system.
               The toplevel coordinate system is not a real coordinate system
               which is stored in the database, but it is a placeholder that
               can be used to request transformations or retrievals to/from
               the highest defined coordinate system in a given region.
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
  Returntype : 0 or 1
  Exceptions : none
  Caller     : general

=cut

sub is_top_level {
  my $self = shift;
  return $self->{'top_level'};
}


=head2 is_sequence_level

  Arg [1]    : none
  Example    : if($coord_sys->is_sequence_level()) { ... }
  Description: Returns true if this is a sequence level coordinate system
  Returntype : 0 or 1
  Exceptions : none
  Caller     : general

=cut

sub is_sequence_level {
  my $self = shift;
  return $self->{'sequence_level'};
}


=head2 is_default

  Arg [1]    : none
  Example    : if($coord_sys->is_default()) { ... }
  Description: Returns true if this coordinate system is the default
               version of the coordinate system of this name.
  Returntype : 0 or 1
  Exceptions : none
  Caller     : general

=cut

sub is_default {
  my $self = shift;
  return $self->{'default'};
}


294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317


=head2 rank

  Arg [1]    : none
  Example    : if($cs1->rank() < $cs2->rank()) {
                 print $cs1->name(), " is a higher level coord system than",
                       $cs2->name(), "\n";
               }
  Description: Returns the rank of this coordinate system.  A lower number
               is a higher coordinate system.  The highest level coordinate
               system has a rank of 1 (e.g. 'chromosome').  The toplevel
               pseudo coordinate system has a rank of 0.
  Returntype : int
  Exceptions : none
  Caller     : general

=cut

sub rank {
  my $self = shift;
  return $self->{'rank'};
}

318
1;