vega_percent_gc_calc.pl 6.29 KB
Newer Older
1
#!/usr/local/bin/perl
2

3
=head1 NAME
4

5
vega_percent_gc_calc.pl - calculate GC content
6

7
=head1 SYNOPSIS
8

9
vega_percent_gc_calc.pl [options]
10

11 12 13
General options:
    --conffile, --conf=FILE             read parameters from FILE
                                        (default: conf/Conversion.ini)
14

15 16 17 18 19 20 21 22 23 24 25 26
    --dbname, db_name=NAME              use database NAME
    --host, --dbhost, --db_host=HOST    use database host HOST
    --port, --dbport, --db_port=PORT    use database port PORT
    --user, --dbuser, --db_user=USER    use database username USER
    --pass, --dbpass, --db_pass=PASS    use database passwort PASS
    --logfile, --log=FILE               log to FILE (default: *STDOUT)
    --logpath=PATH                      write logfile to PATH (default: .)
    --logappend, --log_append           append to logfile (default: truncate)
    -v, --verbose                       verbose logging (default: false)
    -i, --interactive=0|1               run script interactively (default: true)
    -n, --dry_run, --dry=0|1            don't write results to database
    -h, --help, -?                      print help (this message)
27

Steve Trevanion's avatar
Steve Trevanion committed
28 29 30 31
    --prune				undo, i.e. delete from the database changes caused by running the script			



32
=head1 DESCRIPTION
33

34
This script calculates GC content per chromosomes.
35

36 37 38 39 40
The block size is determined so that you have 150 bins for the smallest
chromosome over 5 Mb in length. For chromosomes smaller than 5 Mb, an
additional smaller block size is used to yield 150 bins for the overall
smallest chromosome. This will result in reasonable resolution for small
chromosomes and high performance for big ones.
41

42
=head1 LICENCE
43

44 45
This code is distributed under an Apache style licence:
Please see http://www.ensembl.org/code_licence.html for details
46

47
=head1 AUTHOR
48

49 50 51 52 53 54 55
Patrick Meidl <pm2@sanger.ac.uk>

=head1 CONTACT

Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk

=cut
56

57 58 59 60 61 62 63 64 65 66 67
use strict;
use warnings;
no warnings 'uninitialized';

use FindBin qw($Bin);
use vars qw($SERVERROOT);

BEGIN {
    $SERVERROOT = "$Bin/../../..";
    unshift(@INC, "$SERVERROOT/ensembl/modules");
    unshift(@INC, "$SERVERROOT/bioperl-live");
68 69
}

70 71 72 73 74 75 76 77 78 79 80 81 82
use Getopt::Long;
use Pod::Usage;
use Bio::EnsEMBL::Utils::ConversionSupport;
use Bio::EnsEMBL::DensityType;
use Bio::EnsEMBL::DensityFeature;
use POSIX;

$| = 1;

my $support = new Bio::EnsEMBL::Utils::ConversionSupport($SERVERROOT);

# parse options
$support->parse_common_options(@_);
Steve Trevanion's avatar
Steve Trevanion committed
83 84 85
$support->parse_extra_options('prune');

$support->allowed_params($support->get_common_params, 'prune');
86 87 88 89

if ($support->param('help') or $support->error) {
    warn $support->error if $support->error;
    pod2usage(1);
90 91
}

92 93 94 95 96 97 98 99 100 101 102
# ask user to confirm parameters to proceed
$support->confirm_params;

# get log filehandle and print heading and parameters to logfile
$support->init_log;

# connect to database and get adaptors
my $dba = $support->get_database('ensembl');
my $dfa = $dba->get_DensityFeatureAdaptor;
my $dta = $dba->get_DensityTypeAdaptor;
my $aa = $dba->get_AnalysisAdaptor;
Steve Trevanion's avatar
Steve Trevanion committed
103 104 105 106
my $dbh= $dba->dbc->db_handle;

# check for prune option (undo)
if($support->param('prune')){
Steve Trevanion's avatar
Steve Trevanion committed
107
	exit unless ($support->user_proceed('About to delete all data from previous runs of this script. Proceed ?'));
Steve Trevanion's avatar
Steve Trevanion committed
108

Steve Trevanion's avatar
Steve Trevanion committed
109 110 111
	my $query= "delete analysis, density_type, density_feature from analysis left join density_type on  analysis.analysis_id= density_type.analysis_id left join density_feature on density_type.density_type_id= density_feature.density_type_id where analysis.program= 'percent_gc_calc.pl'";
	if(my $c = $dbh->do($query)){
		$support->log("prune was successfull: $c previous entries in the database generated by this script have been deleted\n");
Steve Trevanion's avatar
Steve Trevanion committed
112 113 114
	}
	else{
		$support->log_error("prune failed: any previous entries in the database generated by this script have NOT been deleted\n");
Steve Trevanion's avatar
Steve Trevanion committed
115
		exit;
Steve Trevanion's avatar
Steve Trevanion committed
116
	}
117

Steve Trevanion's avatar
Steve Trevanion committed
118 119 120 121 122 123 124
    # Create new Analysis object
    my $analysis = Bio::EnsEMBL::Analysis->new(
        -program     => "percent_gc_calc.pl",
        -database    => "ensembl",
        -gff_source  => "percent_gc_calc.pl",
        -gff_feature => "density",
        -logic_name  => "PercentGC",
125
    );
Steve Trevanion's avatar
Steve Trevanion committed
126
    $aa->store($analysis) unless ($support->param('dry_run'));
Steve Trevanion's avatar
Steve Trevanion committed
127

Steve Trevanion's avatar
Steve Trevanion committed
128 129
    # split chromosomes by size and determine block size
    my $chr_slices = $support->split_chromosomes_by_size(5000000);
Steve Trevanion's avatar
Steve Trevanion committed
130

Steve Trevanion's avatar
Steve Trevanion committed
131 132 133 134
    # loop over block sizes
    foreach my $block_size (keys %{ $chr_slices }) {
        $support->log("Available chromosomes using block size of $block_size:\n    ");
        $support->log(join("\n    ", map { $_->seq_region_name } @{ $chr_slices->{$block_size} })."\n");
Steve Trevanion's avatar
Steve Trevanion committed
135

Steve Trevanion's avatar
Steve Trevanion committed
136 137 138 139 140 141 142
        # create DensityType objects
        my $density_type = Bio::EnsEMBL::DensityType->new(
             -analysis   => $analysis,
             -block_size => $block_size,
             -value_type => 'ratio',
        );
        $dta->store($density_type) unless ($support->param('dry_run'));
Steve Trevanion's avatar
Steve Trevanion committed
143

Steve Trevanion's avatar
Steve Trevanion committed
144 145 146 147 148 149 150 151
        # loop over chromosomes
        $support->log_stamped("Looping over chromosomes...\n");
        my ($current_start, $current_end);
        foreach my $slice (@{ $chr_slices->{$block_size} }) {
            $current_start = 1;
            my $chr = $slice->seq_region_name;
            my $i;
            my $bins = POSIX::ceil($slice->end/$block_size);
Steve Trevanion's avatar
Steve Trevanion committed
152

Steve Trevanion's avatar
Steve Trevanion committed
153
            $support->log_stamped("Chromosome $chr with block size $block_size...\n", 1);
Steve Trevanion's avatar
Steve Trevanion committed
154

Steve Trevanion's avatar
Steve Trevanion committed
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
            # loop over blocks
            while($current_start <= $slice->end) {
                $i++;
                $current_end = $current_start + $block_size - 1;
                if ($current_end > $slice->end) {
                    $current_end = $slice->end;
                }
                my $sub_slice = $slice->sub_Slice( $current_start, $current_end );
                my $gc = $sub_slice->get_base_count->{'%gc'};
                $support->log_verbose("Chr: $chr | Bin: $i/$bins | \%GC: $gc\n", 2);
                my $df =  Bio::EnsEMBL::DensityFeature->new(
                    -seq_region    => $slice,
                    -start         => $current_start,
                    -end           => $current_end,
                    -density_type  => $density_type,
                    -density_value => $gc,
                );
                $dfa->store($df) unless ($support->param('dry_run'));
                $current_start = $current_end + 1;
174
            }
Steve Trevanion's avatar
Steve Trevanion committed
175
            $support->log_stamped("Done.\n", 1);
176
        }
Steve Trevanion's avatar
Steve Trevanion committed
177
        $support->log_stamped("Done.\n");
178 179
    }
}
180 181 182 183

# finish logfile
$support->finish_log;