Newer
Older
# get species name from database if not set
unless ($self->{'_species'}) {
$self->{'_species'} = join('_',
split(/ /, $self->get_species_scientific_name));
}
return $self->{'_species'};
}
=head2 sort_chromosomes
Arg[1] : (optional) Hashref $chr_hashref - Hashref with chr_name as keys
Example : my $chr = { '6-COX' => 1, '1' => 1, 'X' => 1 };
my @sorted = $support->sort_chromosomes($chr);
Description : Sorts chromosomes in an intuitive way (numerically, then
alphabetically). If no chromosome hashref is passed, it's
retrieve by calling $self->get_chrlength()
Return type : List - sorted chromosome names
Exceptions : thrown if no hashref is provided
Caller : general
=cut
sub sort_chromosomes {
my ($self, $chr_hashref) = @_;
$chr_hashref = $self->get_chrlength unless ($chr_hashref);
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
throw("You have to pass a hashref of your chromosomes")
unless ($chr_hashref and ref($chr_hashref) eq 'HASH');
return (sort _by_chr_num keys %$chr_hashref);
}
=head2 _by_chr_num
Example : my @sorted = sort _by_chr_num qw(X, 6-COX, 14, 7);
Description : Subroutine to use in sort for sorting chromosomes. Sorts
numerically, then alphabetically
Return type : values to be used by sort
Exceptions : none
Caller : internal ($self->sort_chromosomes)
=cut
sub _by_chr_num {
my @awords = split /-/, $a;
my @bwords = split /-/, $b;
my $anum = $awords[0];
my $bnum = $bwords[0];
if ($anum !~ /^[0-9]*$/) {
if ($bnum !~ /^[0-9]*$/) {
return $anum cmp $bnum;
} else {
return 1;
}
}
if ($bnum !~ /^[0-9]*$/) {
return -1;
}
if ($anum <=> $bnum) {
return $anum <=> $bnum;
} else {
if ($#awords == 0) {
return -1;
} elsif ($#bwords == 0) {
return 1;
} else {
return $awords[1] cmp $bwords[1];
}
}
}
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
=head2 split_chromosomes_by_size
Arg[1] : (optional) Int $cutoff - the cutoff in bp between small and
large chromosomes
Example : my $chr_slices = $support->split_chromosomes_by_size;
foreach my $block_size (keys %{ $chr_slices }) {
print "Chromosomes with blocksize $block_size: ";
print join(", ", map { $_->seq_region_name }
@{ $chr_slices->{$block_size} });
}
Description : Determines block sizes for storing DensityFeatures on
chromosomes, and return slices for each chromosome. The block
size is determined so that you have 150 bins for the smallest
chromosome over 5 Mb in length. For chromosomes smaller than 5
Mb, an additional smaller block size is used to yield 150 bins
for the overall smallest chromosome. This will result in
reasonable resolution for small chromosomes and high
performance for big ones.
Return type : Hashref (key: block size; value: Arrayref of chromosome
Bio::EnsEMBL::Slices)
Exceptions : none
Caller : density scripts
=cut
sub split_chromosomes_by_size {
my $self = shift;
my $cutoff = shift || 5000000;
my $slice_adaptor = $self->dba->get_SliceAdaptor;
my $top_slices;
if ($self->param('chromosomes')) {
foreach my $chr ($self->param('chromosomes')) {
push @{ $top_slices }, $slice_adaptor->fetch_by_region('chromosome', $chr);
}
} else {
$top_slices = $slice_adaptor->fetch_all('chromosome');
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
}
my ($big_chr, $small_chr, $min_big_chr, $min_small_chr);
foreach my $slice (@{ $top_slices }) {
if ($slice->length < $cutoff) {
if (! $min_small_chr or ($min_small_chr > $slice->length)) {
$min_small_chr = $slice->length;
}
# push small chromosomes onto $small_chr
push @{ $small_chr }, $slice;
}
if (! $min_big_chr or ($min_big_chr > $slice->length) && $slice->length > $cutoff) {
$min_big_chr = $slice->length;
}
# push _all_ chromosomes onto $big_chr
push @{ $big_chr }, $slice;
}
my $chr_slices;
$chr_slices->{int($min_big_chr/150)} = $big_chr if $min_big_chr;
$chr_slices->{int($min_small_chr/150)} = $small_chr if $min_small_chr;
return $chr_slices;
}
=head2 log
Arg[1] : String $txt - the text to log
Arg[2] : Int $indent - indentation level for log message
Example : my $log = $support->log_filehandle;
$support->log('Log foo.\n', 1);
Description : Logs a message to the filehandle initialised by calling
$self->log_filehandle(). You can supply an indentation level
to get nice hierarchical log messages.
Return type : true on success
Exceptions : thrown when no filehandle can be obtained
Caller : general
=cut
sub log {
my ($self, $txt, $indent) = @_;
$indent ||= 0;
# strip off leading linebreaks so that indenting doesn't break
$txt =~ s/^(\n*)//;
$txt = $1." "x$indent . $txt;
my $fh = $self->{'_log_filehandle'};
throw("Unable to obtain log filehandle") unless $fh;
print $fh "$txt";
return(1);
}
=head2 log_warning
Arg[1] : String $txt - the warning text to log
Arg[2] : Int $indent - indentation level for log message
Arg[2] : Bool - add a line break before warning if true
Example : my $log = $support->log_filehandle;
$support->log_warning('Log foo.\n', 1);
Description : Logs a message via $self->log and increases the warning counter.
Return type : true on success
Exceptions : none
Caller : general
=cut
sub log_warning {
my ($self, $txt, $indent, $break) = @_;
$txt = "WARNING: " . $txt;
$txt = "\n$txt" if ($break);
$self->log($txt, $indent);
$self->{'_warnings'}++;
return(1);
}
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
=head2 log_error
Arg[1] : String $txt - the error text to log
Arg[2] : Int $indent - indentation level for log message
Example : my $log = $support->log_filehandle;
$support->log_error('Log foo.\n', 1);
Description : Logs a message via $self->log and exits the script.
Return type : none
Exceptions : none
Caller : general
=cut
sub log_error {
my ($self, $txt, $indent) = @_;
$txt = "ERROR: ".$txt;
$self->log($txt, $indent);
$self->log("Exiting.\n");
exit;
}
=head2 log_verbose
Arg[1] : String $txt - the warning text to log
Arg[2] : Int $indent - indentation level for log message
Example : my $log = $support->log_filehandle;
$support->log_verbose('Log this verbose message.\n', 1);
Description : Logs a message via $self->log if --verbose option was used
Return type : TRUE on success, FALSE if not verbose
Exceptions : none
Caller : general
=cut
sub log_verbose {
my ($self, $txt, $indent) = @_;
return(0) unless $self->param('verbose');
$self->log($txt, $indent);
return(1);
}
=head2 log_stamped
Arg[1] : String $txt - the warning text to log
Arg[2] : Int $indent - indentation level for log message
Example : my $log = $support->log_filehandle;
$support->log_stamped('Log this stamped message.\n', 1);
Description : Appends timestamp and memory usage to a message and logs it via
$self->log
Return type : TRUE on success
Exceptions : none
Caller : general
=cut
sub log_stamped {
my ($self, $txt, $indent) = @_;
# append timestamp and memory usage to log text
$txt =~ s/(\n*)$//;
$txt .= " ".$self->date_and_mem.$1;
$self->log($txt, $indent);
return(1);
}
=head2 log_filehandle
Arg[1] : (optional) String $mode - file access mode
Example : my $log = $support->log_filehandle;
# print to the filehandle
print $log 'Lets start logging...\n';
# log via the wrapper $self->log()
$support->log('Another log message.\n');
Description : Returns a filehandle for logging (STDERR by default, logfile if
set from config or commandline). You can use the filehandle
directly to print to, or use the smart wrapper $self->log().
Logging mode (truncate or append) can be set by passing the
mode as an argument to log_filehandle(), or with the
--logappend commandline option (default: truncate)
Return type : Filehandle - the filehandle to log to
Exceptions : thrown if logfile can't be opened
Caller : general
=cut
sub log_filehandle {
my ($self, $mode) = @_;
$mode ||= '>';
$mode = '>>' if ($self->param('logappend'));
my $fh = \*STDERR;
if (my $logfile = $self->param('logfile')) {
Patrick Meidl
committed
if (my $logpath = $self->param('logpath')) {
unless (-e $logpath) {
system("mkdir $logpath") == 0 or
$self->log_error("Can't create log dir $logpath: $!\n");
}
Patrick Meidl
committed
$logfile = "$logpath/$logfile";
}
open($fh, "$mode", $logfile) or throw(
"Unable to open $logfile for writing: $!");
}
$self->{'_log_filehandle'} = $fh;
return $self->{'_log_filehandle'};
}
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
=head2 filehandle
Arg[1] : String $mode - file access mode
Arg[2] : String $file - input or output file
Example : my $fh = $support->filehandle('>>', '/path/to/file');
# print to the filehandle
print $fh 'Your text goes here...\n';
Description : Returns a filehandle (*STDOUT for writing, *STDIN for reading
by default) to print to or read from.
Return type : Filehandle - the filehandle
Exceptions : thrown if file can't be opened
Caller : general
=cut
sub filehandle {
my ($self, $mode, $file) = @_;
$mode ||= ">";
my $fh;
if ($file) {
open($fh, "$mode", $file) or throw(
"Unable to open $file for writing: $!");
} elsif ($mode =~ />/) {
$fh = \*STDOUT;
} elsif ($mode =~ /</) {
$fh = \*STDIN;
}
return $fh;
}
=head2 init_log
Example : $support->init_log;
Description : Opens a filehandle to the logfile and prints some header
information to this file. This includes script name, date, user
running the script and parameters the script will be running
with.
Return type : Filehandle - the log filehandle
Exceptions : none
Caller : general
=cut
sub init_log {
my $self = shift;
# get a log filehandle
my $log = $self->log_filehandle;
# print script name, date, user who is running it
my $hostname = `hostname`;
chomp $hostname;
my $script = "$hostname:$Bin/$Script";
my $user = `whoami`;
chomp $user;
$self->log("Script: $script\nDate: ".$self->date."\nUser: $user\n");
# print parameters the script is running with
$self->log("Parameters:\n\n");
$self->log($self->list_all_params);
# remember start time
$self->{'_start_time'} = time;
return $log;
}
=head2 finish_log
Example : $support->finish_log;
Description : Writes footer information to a logfile. This includes the
number of logged warnings, timestamp and memory footprint.
Return type : TRUE on success
Exceptions : none
Caller : general
=cut
sub finish_log {
my $self = shift;
$self->log("\nAll done. ".$self->warnings." warnings. ");
if ($self->{'_start_time'}) {
$self->log("Runtime ");
my $diff = time - $self->{'_start_time'};
my $sec = $diff % 60;
$diff = ($diff - $sec) / 60;
my $min = $diff % 60;
my $hours = ($diff - $min) / 60;
$self->log("${hours}h ${min}min ${sec}sec ");
}
$self->log($self->date_and_mem."\n\n");
return(1);
}
=head2 date_and_mem
Example : print LOG "Time, memory usage: ".$support->date_and_mem."\n";
Description : Prints a timestamp and the memory usage of your script.
Return type : String - timestamp and memory usage
Exceptions : none
Caller : general
=cut
sub date_and_mem {
my $date = strftime "%Y-%m-%d %T", localtime;
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
chomp $mem;
return "[$date, mem $mem]";
}
=head2 date
Example : print "Date: " . $support->date . "\n";
Description : Prints a nicely formatted timestamp (YYYY-DD-MM hh:mm:ss)
Return type : String - the timestamp
Exceptions : none
Caller : general
=cut
sub date {
return strftime "%Y-%m-%d %T", localtime;
}
=head2 mem
Example : print "Memory usage: " . $support->mem . "\n";
Description : Prints the memory used by your script. Not sure about platform
dependence of this call ...
Return type : String - memory usage
Exceptions : none
Caller : general
=cut
sub mem {
chomp $mem;
return $mem;
}
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
=head2 commify
Arg[1] : Int $num - a number to commify
Example : print "An easy to read number: ".$self->commify(100000000);
# will print 100,000,000
Description : put commas into a number to make it easier to read
Return type : a string representing the commified number
Exceptions : none
Caller : general
Status : stable
=cut
sub commify {
my $self = shift;
my $num = shift;
$num = reverse($num);
$num =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
return scalar reverse $num;
}
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
=head2 fetch_non_hidden_slices
Arg[1] : B::E::SliceAdaptor
Arg[2] : B::E::AttributeAdaptor
Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
Arg[4] : string $coord_system_version (optional) - 'otter' by default
Example : $chroms = $support->fetch_non_hidden_slice($sa);
Description : retrieve all slices from a lutra database that don't have a hidden attribute
Return type : arrayref
Caller : general
Status : stable
=cut
sub fetch_non_hidden_slices {
my $self = shift;
my $aa = shift or throw("You must supply an attribute adaptor");
my $sa = shift or throw("You must supply a slice adaptor");
my $cs = shift || 'chromosome';
my $cv = shift || 'Otter';
my $visible_chroms;
foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) {
my $attribs = $aa->fetch_all_by_Slice($chrom);
push @$visible_chroms, $chrom if @{$self->get_attrib_values($attribs,'hidden',0)};
}
return $visible_chroms;
}
=head2 get_attrib_values
Arg[1] : Arrayref of B::E::Attributes
Arg[2] : 'code' to search for
Arg[3] : 'value' to search for (optional)
Example : my $c = $self->get_attrib_values($attribs,'name'));
Description : (i) In the abscence of an attribute value argument examines an arrayref
of B::E::Attributes for a particular attribute type, returning the values
for each attribute of that type (can therefore be used to test for the
number of attributes of that type).
(ii) In the presence of the optional value argument, it can be used to test
for the presence of an attribute with a particular value
Return type : arrayref of values for that attribute
Caller : general
Status : stable
=cut
sub get_attrib_values {
my $self = shift;
my $attribs = shift;
my $code = shift;
my $value = shift;
if (my @atts = grep {$_->code eq $code } @$attribs) {
my $r;
if ($value) {
if (my @values = grep {$_->value eq $value} @atts) {
foreach (@values) {
push @$r, $_->value;
}
return $r;
}
else {
return [];
}
}
else {
foreach (@atts) {
push @$r, $_->value;
}
return $r;
}
}
else {
return [];
}
}
=head2 fix_attrib_value
Arg[1] : Arrayref of exisiting B::E::Attributes
Arg[2] : dbID of object
Arg[3] : name of object (just for reporting)
Arg[4] : attrib_type.code
Arg[5] : attrib_type.value
Arg[5] : interactive ? (0 by default)
Arg[6] : table
Example : $support->fix_attrib_value($attribs,$chr_id,$chr_name,'vega_export_mod','N',1);
Description : adds a new attribute to an object, or updates an existing attribute with a new value
Can be run in interactive or non-interactive mode (default)
Return type : none
Caller : general
Status : only ever tested with seq_region_attributes to date
=cut
sub fix_attrib_value {
my $self = shift;
my $attribs = shift;
my $id = shift;
my $name = shift;
my $code = shift;
my $value = shift;
my $interact = shift || 0;
my $table = shift || 'seq_region_attrib';
#set interactive parameter
my $int_before;
if (! $interact) {
$int_before = $self->param('interactive');
$self->param('interactive',0);
}
# warn "interactive_before = $int_before";
#get any existing value(s) for this attribute
my $existings = $self->get_attrib_values($attribs,$code);
#add a new attribute if there is none...
if (! @$existings ) {
if ($self->user_proceed("Do you want to set $name attrib (code = $code) to value $value ?")) {
my $r = $self->store_new_attribute($id,$code,$value);
#reset interactive parameter
$self->param('interactive',$int_before) if (! $interact);
return $r;
}
}
#...warn and exit if you're trying to update more than one value for the same attribute...
elsif (scalar @$existings > 1) {
$self->log_warning("You shouldn't be trying to update multiple attributes with the same code at once ($name:$code,$value), looks like you have duplicate entries in the (seq_region_)attrib table\n");
exit;
}
else {
my $existing = $existings->[0];
#...or update an attribute with new values...
if ($existing ne $value) {
if ($self->user_proceed("Do you want to reset $name attrib (code = $code) from $existing to $value ?")) {
my $r = $self->update_attribute($id,$code,$value);
$self->param('interactive',$int_before) if (! $interact);
push @$r, $existing;
return $r;
}
}
#...or make no change
else {
$self->param('interactive',$int_before) if (! $interact);
return [];
}
}
}
=head2 _get_attrib_id
Arg[1] : attrib_type.code
Arg[2] : database handle
Example : $self->_get_attrib_id('name',$dbh)
Description : get attrib_type.attrib_type_id from a attrib_type.code
Return type : attrib_type.attrib_type_id
Caller : internal
Status : stable
=cut
sub _get_attrib_id {
my $self = shift;
my $attrib_code = shift;
my $dbh = shift;
my ($attrib_id) = $dbh->selectrow_array(
qq(select attrib_type_id
from attrib_type
where code = ?),
{},
($attrib_code)
);
if (! $attrib_id) {
$self->log_warning("There is no attrib_type_id for code $attrib_code, please patch the attrib_table\n");
exit;
}
else {
return $attrib_id;
}
}
=head2 store_new_attribute
Arg[1] : seq_region.seq_region_id
Arg[2] : attrib_type.code
Arg[3] : attrib_type.value
ARG[4] : table to update (seq_region_attribute by default)
Example : $support->store_new_attribute(23,name,5);
Description : uses MySQL to store an entry (code and value) in an attribute table
(seq_region_attrib by default)
Return type : array_ref
Caller : general
Status : stable
=cut
sub store_new_attribute {
my $self = shift;
my $sr_id = shift;
my $attrib_code = shift;
my $attrib_value = shift;
my $table = shift || 'seq_region_attrib';
#get database handle
my $dbh = $self->get_dbconnection('loutre');
#get attrib_type_id for this particular attribute
my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh);
#store
my $r = $dbh->do(
qq(insert into $table
values (?,?,?)),
{},
($sr_id,$attrib_id,$attrib_value)
);
return ['Stored',$r];
}
=head2 update_attribute
Arg[1] : seq_region.seq_region_id
Arg[2] : attrib_type.code
Arg[3] : attrib_type.value
ARG[4] : table to update (seq_region_attribute by default)
Example : $support->update_attribute(23,name,5);
Description : uses MySQL to update an attribute table (seq_region_attrib by default)
Return type : array_ref
Caller : general
Status : stable
=cut
sub update_attribute {
my $self = shift;
my $sr_id = shift;
my $attrib_code = shift;
my $attrib_value = shift;
my $table = shift || 'seq_region_attrib';
my $dbh = $self->get_dbconnection('loutre');
my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh);
#update
my $r = $dbh->do(
qq(update $table
set value = ?
where seq_region_id = $sr_id
and attrib_type_id = $attrib_id),
{},
($attrib_value)
);
return ['Updated',$r];
}