diff --git a/misc-scripts/overlapping_regions.pl b/misc-scripts/overlapping_regions.pl deleted file mode 100644 index 9b678daab0bbe4e485f1f8a44f34211e2f270e5c..0000000000000000000000000000000000000000 --- a/misc-scripts/overlapping_regions.pl +++ /dev/null @@ -1,168 +0,0 @@ -#!/usr/bin/env perl -# Copyright [1999-2014] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -=head1 NAME - -overlapping_regions.pl - calculates overlapping regions in the assembly table - -=head1 SYNOPSIS - -overlapping_regions.pl [arguments] - -Required arguments: - - --host, --dbhost, --db_host=HOST database host HOST - --port, --dbport, --db_port=PORT database port PORT - --user, --dbuser, --db_user=USER database username USER - --pass, --dbpass, --db_pass=PASS database passwort PASS - - --pattern, --dbpattern=PATTERN patch databases where name matches PATTERN - Note that this is a database pattern of - the form %core_41% rather than a regexp - -Optional arguments: - - -n, --dry, --dry_run don't write results to database - -h, --help, -? print help (this message) - -Please note that where an argument expects a value, this is true for all -alternative argument styles. - -=head1 DESCRIPTION - -This is a script that will calculate if an assembly has overlapping regions -in same coordinate system (e.g. 2 chr_1 stored in the assembly table, one as -the current, the other as an old version). It will store a new entry in the meta -table. If there are overlapping regions, can cause problems when moving features -between coordinate systems. The pattern is a string that can be used in an -'IN' clause in SQL (e.g. "%_core_%"; if you want to patch only a single -database, use a pattern without % expansion, e.g. "homo_sapiens_core_38_36"). - -=head1 EXAMPLES - - perl overlapping_regions.pl -host ens-staging -user ensadmin -pass xxxx - -pattern "%core_55_%" -port 3306 -dry_run 0 - - -=head1 AUTHOR - -Daniel Rios <dani@ebi.ac.uk>, Ensembl core API team - -=head1 CONTACT - -Please post comments/questions to the Ensembl development list -<http://lists.ensembl.org/mailman/listinfo/dev> - -=cut - - -use strict; -use warnings; -no warnings 'uninitialized'; -use FindBin qw($Bin); -use vars qw($SERVERROOT); - -BEGIN { - $SERVERROOT = "$Bin/../.."; - unshift(@INC, "$SERVERROOT/ensembl/modules"); -} - -use Getopt::Long; -use Pod::Usage; -use Bio::EnsEMBL::Utils::ConversionSupport; -use DBI; - -$| = 1; - -my $support = new Bio::EnsEMBL::Utils::ConversionSupport($SERVERROOT); - -# parse options -$support->parse_common_options(@_); -$support->parse_extra_options( - 'pattern|dbpattern=s' - ); -my @params = map { $_ unless ($_ =~ /dbname/) } $support->get_common_params; -$support->allowed_params( - @params, - 'pattern' - ); - -if ($support->param('help') or $support->error) { - warn $support->error if $support->error; - pod2usage(1); -} - -# ask user to confirm parameters to proceed -$support->confirm_params; - -# get log filehandle and print heading and parameters to logfile -$support->init_log; - -$support->check_required_params( - 'pattern' -); - -# connect to database -my $dbh = $support->get_dbconnection; - -# get all database names that match pattern -my ($sth, $sql); -$sql = "SHOW DATABASES LIKE '".$support->param('pattern')."'"; -$sth = $dbh->prepare($sql); -$sth->execute; - -if ($support->param('dry_run')) { - $support->log_stamped("Dry run. Results will not be written to the database.\n\n"); -} -# loop over databases -while (my ($dbname) = $sth->fetchrow_array) { - $support->log_stamped("$dbname\n"); - - if ($support->user_proceed("\nCalculate overlapping regions for $dbname?")) { - #this query will detect if there are overlapping regions in same coord_system - #starting in position 1. - $dbh->do("use $dbname;"); - $sql = qq(SELECT count(*) FROM assembly as1, seq_region s1, assembly as2, seq_region s2 - WHERE as1.asm_seq_region_id = s1.seq_region_id AND as2.asm_seq_region_id = s2.seq_region_id - AND s1.coord_system_id = s2.coord_system_id AND as1.asm_start = as2.asm_start - AND as1.asm_end = as2.asm_end AND as1.cmp_seq_region_id = as2.cmp_seq_region_id - AND as1.asm_seq_region_id <> as2.asm_seq_region_id AND as1.asm_start = 1); - my $sth1 = $dbh->prepare($sql); - $sth1->execute; - my $val = $sth1->fetch(); - my $overlaps; - #let's see if we write it in the database - if ($val->[0] > 0){ - $support->log("\tDatabase has overlapping regions. This might make give wrong results when mapping features\n"); - $overlaps = 'true'; - } - else{ - $support->log("\tDatabase looks correct\n"); - $overlaps = 'false'; - } - if (!$support->param('dry_run')){ - #let's write results to the database. First remove previous entry - $sql = qq(DELETE FROM meta where meta_key = 'assembly.overlapping_regions'); - $dbh->do($sql); - #and now will insert it - $sql = qq(INSERT INTO meta (species_id,meta_key,meta_value) VALUES (1,'assembly.overlapping_regions',$overlaps)); - $dbh->do($sql); - } - $support->log("Done\n"); - } - -} -$support->finish_log;