ChecksumGenerator.pm 3.07 KB
Newer Older
1 2 3 4
=pod

=head1 LICENSE

5
  Copyright (c) 1999-2013 The European Bioinformatics Institute and
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <dev@ensembl.org>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=head1 NAME

23
Bio::EnsEMBL::Pipeline::ChecksumGenerator
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42

=head1 DESCRIPTION

Creates a CHECKSUMS file in the given directory which is produced from running
the sum command over every file in the directory. This excludes the CHECKSUMS
file, parent directory or any hidden files.

Allowed parameters are:

=over 8

=item dir - The directory to generate checksums for

=item gzip - If the resulting file should be gzipped. Defaults to false

=back

=cut

43
package Bio::EnsEMBL::Pipeline::ChecksumGenerator;
44 45 46 47

use strict;
use warnings;

48
use base qw/Bio::EnsEMBL::Pipeline::Base/;
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137

use File::Spec;
use Bio::EnsEMBL::Utils::IO qw/work_with_file gz_work_with_file/;

sub param_defaults {
  my ($self) = @_;
  return {
    gzip => 0
  };
}

sub fetch_input {
  my ($self) = @_;
  my $dir = $self->param('dir');
  $self->throw("No 'dir' parameter specified") unless $dir;
  $self->throw("Dir $dir does not exist") unless -d $dir;
  return;
}

sub run {
  my ($self) = @_;
  my @checksums;
  
  my $dir = $self->param('dir');
  $self->info('Checksumming directory %s', $dir);

  opendir(my $dh, $dir) or die "Cannot open directory $dir";
  my @files = sort { $a cmp $b } readdir($dh);
  closedir($dh) or die "Cannot close directory $dir";

  foreach my $file (@files) {
    next if $file =~ /^\./;         #hidden file or up/current dir
    next if $file =~ /^CHECKSUM/;
    my $path = File::Spec->catfile($dir, $file);
    my $checksum = $self->checksum($path);
    push(@checksums, [$checksum, $file])
  }
  
  $self->param('checksums', \@checksums);
  return;
}

sub write_output {
  my ($self) = @_;
  my $dir = $self->param('dir');
  my $checksum = File::Spec->catfile($dir, 'CHECKSUMS');
  $checksum .= '.gz' if $self->param('gzip');
  if(-f $checksum) {
    $self->info('Checksum file already exists. Removing');
    unlink $checksum;
  }
  
  my @checksums = @{$self->param('checksums')};
  
  return unless @checksums;
  
  my $writer = sub {
    my ($fh) = @_;
    foreach my $entry (@checksums) {
      my $line = join(qq{\t}, @{$entry});
      print $fh $line;
      print $fh "\n"; 
    }
    return;
  };
  my @params = ($checksum, 'w', $writer);
  
  
  if($self->param('gzip')) {
    gz_work_with_file(@params);
  } 
  else {
    work_with_file(@params);
  } 
  
  $self->permissions($checksum);
  return;
}

sub checksum {
  my ($self, $path) = @_;
  my $checksum = `sum $path`;
  $checksum =~ s/\s* $path//xms;
  chomp($checksum);
  return $checksum;
}

sub permissions {
  my ($self, $file) = @_;
138
  my $mode = 0666; ## no critic
139 140 141 142 143
  chmod($mode, $file) or $self->throw("Cannot perform the chmod to mode $mode for file $file");
  return;
}

1;