Skip to content
Snippets Groups Projects
Commit 998c9df9 authored by Patrick Meidl's avatar Patrick Meidl
Browse files

changes from latest run (mouse NCBIM36 to 37):

  - allow mismatches
  - fixes to cope with changes to ConversionSupport->sort_chromosomes()
  - allow most scripts (with the exception of load_alternative_assembly.pl) to
    use source and target dbs on different hosts
  - various other improvements
parent ea268756
No related branches found
No related tags found
No related merge requests found
...@@ -279,7 +279,9 @@ sub find_best_alignment { ...@@ -279,7 +279,9 @@ sub find_best_alignment {
Example : $aligner->parse_blastz_output; Example : $aligner->parse_blastz_output;
Description : Reads a blastz alignment result from an axt file and creates Description : Reads a blastz alignment result from an axt file and creates
a datastructure containing ungapped alignments from it. a datastructure containing ungapped alignments from it. Note
that mismatches are allowed, but separate stats will be
collected for them.
Return type : none Return type : none
Exceptions : none Exceptions : none
Caller : general Caller : general
...@@ -329,15 +331,15 @@ sub parse_blastz_output { ...@@ -329,15 +331,15 @@ sub parse_blastz_output {
$self->stats_incr('R_gap', 1) if ($R_arr[$j] eq '-'); $self->stats_incr('R_gap', 1) if ($R_arr[$j] eq '-');
$match_flag = 0; $match_flag = 0;
} else { } else {
$self->found_match($match_flag, $j, \%coords);
$match_flag = 1;
# match # match
if ($A_arr[$j] eq $R_arr[$j]) { if ($A_arr[$j] eq $R_arr[$j]) {
$self->found_match($match_flag, $j, \%coords);
$self->stats_incr('match', 1); $self->stats_incr('match', 1);
$match_flag = 1;
# mismatch # mismatch
} else { } else {
$self->stats_incr('mismatch', 1); $self->stats_incr('mismatch', 1);
$match_flag = 0;
} }
} }
} }
......
...@@ -90,6 +90,9 @@ chromosome at a time over lsf: ...@@ -90,6 +90,9 @@ chromosome at a time over lsf:
$ ensembl/misc-scripts/assembly/align_nonident_regions_wrapper.pl $ ensembl/misc-scripts/assembly/align_nonident_regions_wrapper.pl
After running this, check all logfiles for errors (also the lsf logs in the lsf/
subdirectory of your logpath).
4. Remove overlaps: 4. Remove overlaps:
=================== ===================
......
...@@ -120,6 +120,10 @@ my $support = new Bio::EnsEMBL::Utils::ConversionSupport($SERVERROOT); ...@@ -120,6 +120,10 @@ my $support = new Bio::EnsEMBL::Utils::ConversionSupport($SERVERROOT);
$support->parse_common_options(@_); $support->parse_common_options(@_);
$support->parse_extra_options( $support->parse_extra_options(
'assembly=s', 'assembly=s',
'althost=s',
'altport=i',
'altuser=s',
'altpass=s',
'altdbname=s', 'altdbname=s',
'altassembly=s', 'altassembly=s',
'chromosomes|chr=s@', 'chromosomes|chr=s@',
...@@ -128,6 +132,10 @@ $support->parse_extra_options( ...@@ -128,6 +132,10 @@ $support->parse_extra_options(
$support->allowed_params( $support->allowed_params(
$support->get_common_params, $support->get_common_params,
'assembly', 'assembly',
'althost',
'altport',
'altuser',
'altpass',
'altdbname', 'altdbname',
'altassembly', 'altassembly',
'chromosomes', 'chromosomes',
...@@ -167,10 +175,9 @@ if ($support->param('verbose') and $support->user_proceed($txt)) { ...@@ -167,10 +175,9 @@ if ($support->param('verbose') and $support->user_proceed($txt)) {
# #
my ($dba, $dbh, $sql, $sth); my ($dba, $dbh, $sql, $sth);
# first set connection parameters for alternative db # first set connection parameters for alternative db if not different from
# both databases have to be on the same host, so we don't need to configure # reference db
# them separately map { $support->param("alt$_", $support->param($_)) unless ($support->param("alt$_")) } qw(host port user);
map { $support->param("alt$_", $support->param($_)) } qw(host port user pass);
# reference database # reference database
my $R_dba = $support->get_database('ensembl'); my $R_dba = $support->get_database('ensembl');
...@@ -206,8 +213,8 @@ unless ($support->param('dry_run')) { ...@@ -206,8 +213,8 @@ unless ($support->param('dry_run')) {
##### #####
# get reference and alternative chromosomes # get reference and alternative chromosomes
# #
my $R_chrlength = $support->get_chrlength($R_dba, $support->param('assembly')); my $R_chrlength = $support->get_chrlength($R_dba, $support->param('assembly'), 'chromosome');
my $A_chrlength = $support->get_chrlength($R_dba, $support->param('altassembly')); my $A_chrlength = $support->get_chrlength($R_dba, $support->param('altassembly'), 'chromosome');
my $ref_chr_map = $support->get_ensembl_chr_mapping($R_dba, $support->param('assembly')); my $ref_chr_map = $support->get_ensembl_chr_mapping($R_dba, $support->param('assembly'));
##### #####
......
...@@ -119,6 +119,10 @@ my $support = new Bio::EnsEMBL::Utils::ConversionSupport($SERVERROOT); ...@@ -119,6 +119,10 @@ my $support = new Bio::EnsEMBL::Utils::ConversionSupport($SERVERROOT);
$support->parse_common_options(@_); $support->parse_common_options(@_);
$support->parse_extra_options( $support->parse_extra_options(
'assembly=s', 'assembly=s',
'althost=s',
'altport=i',
'altuser=s',
'altpass=s',
'altdbname=s', 'altdbname=s',
'altassembly=s', 'altassembly=s',
'bindir=s', 'bindir=s',
...@@ -128,6 +132,10 @@ $support->parse_extra_options( ...@@ -128,6 +132,10 @@ $support->parse_extra_options(
$support->allowed_params( $support->allowed_params(
$support->get_common_params, $support->get_common_params,
'assembly', 'assembly',
'althost',
'altport',
'altuser',
'altpass',
'altdbname', 'altdbname',
'altassembly', 'altassembly',
'bindir', 'bindir',
...@@ -157,10 +165,9 @@ $support->check_required_params( ...@@ -157,10 +165,9 @@ $support->check_required_params(
# #
my ($dba, $dbh, $sql, $sth); my ($dba, $dbh, $sql, $sth);
# first set connection parameters for alternative db # first set connection parameters for alternative db if not different from
# both databases have to be on the same host, so we don't need to configure # reference db
# them separately map { $support->param("alt$_", $support->param($_)) unless ($support->param("alt$_")) } qw(host port user);
map { $support->param("alt$_", $support->param($_)) } qw(host port user pass);
# reference database # reference database
my $R_dba = $support->get_database('ensembl'); my $R_dba = $support->get_database('ensembl');
......
...@@ -97,6 +97,10 @@ my $support = new Bio::EnsEMBL::Utils::ConversionSupport($SERVERROOT); ...@@ -97,6 +97,10 @@ my $support = new Bio::EnsEMBL::Utils::ConversionSupport($SERVERROOT);
$support->parse_common_options(@_); $support->parse_common_options(@_);
$support->parse_extra_options( $support->parse_extra_options(
'assembly=s', 'assembly=s',
'althost=s',
'altport=i',
'altuser=s',
'altpass=s',
'altdbname=s', 'altdbname=s',
'altassembly=s', 'altassembly=s',
'bindir=s', 'bindir=s',
...@@ -106,6 +110,10 @@ $support->parse_extra_options( ...@@ -106,6 +110,10 @@ $support->parse_extra_options(
$support->allowed_params( $support->allowed_params(
$support->get_common_params, $support->get_common_params,
'assembly', 'assembly',
'althost',
'altport',
'altuser',
'altpass',
'altdbname', 'altdbname',
'altassembly', 'altassembly',
'bindir', 'bindir',
...@@ -140,7 +148,7 @@ my $R_dba = $support->get_database('ensembl'); ...@@ -140,7 +148,7 @@ my $R_dba = $support->get_database('ensembl');
# loop over chromosomes # loop over chromosomes
$support->log_stamped("Looping over chromosomes...\n"); $support->log_stamped("Looping over chromosomes...\n");
foreach my $chr ($support->sort_chromosomes) { foreach my $chr ($support->sort_chromosomes($support->get_chrlength(undef, undef, 'chromosome'))) {
$support->log_stamped("Chromosome $chr...\n", 1); $support->log_stamped("Chromosome $chr...\n", 1);
# run align_nonident_regions.pl via lsf # run align_nonident_regions.pl via lsf
......
...@@ -148,7 +148,7 @@ my $A_sa = $A_dba->get_SliceAdaptor; ...@@ -148,7 +148,7 @@ my $A_sa = $A_dba->get_SliceAdaptor;
$support->log("Looping over chromosomes...\n\n"); $support->log("Looping over chromosomes...\n\n");
foreach my $chr ($support->sort_chromosomes) { foreach my $chr ($support->sort_chromosomes($support->get_chrlength(undef, undef, 'chromosome'))) {
$support->log_stamped("Chromosome $chr...\n", 1); $support->log_stamped("Chromosome $chr...\n", 1);
my $R_slice = $R_sa->fetch_by_region('chromosome', $chr); my $R_slice = $R_sa->fetch_by_region('chromosome', $chr);
...@@ -158,6 +158,7 @@ foreach my $chr ($support->sort_chromosomes) { ...@@ -158,6 +158,7 @@ foreach my $chr ($support->sort_chromosomes) {
my @segments = @{ $R_slice->project('chromosome', $support->param('altassembly')) }; my @segments = @{ $R_slice->project('chromosome', $support->param('altassembly')) };
my $i; my $i;
my $k;
foreach my $seg (@segments) { foreach my $seg (@segments) {
# reference sequence # reference sequence
...@@ -180,7 +181,7 @@ foreach my $chr ($support->sort_chromosomes) { ...@@ -180,7 +181,7 @@ foreach my $chr ($support->sort_chromosomes) {
} else { } else {
# not identical -> something is wrong # not identical -> something is wrong
$support->log_warning("Sequence mismatch at ".$R_sub_slice->name."\n", 2); $support->log("Sequence mismatch at ".$R_sub_slice->name."\n", 2);
my $R_sub_seq; my $R_sub_seq;
my $A_sub_seq; my $A_sub_seq;
...@@ -194,16 +195,18 @@ foreach my $chr ($support->sort_chromosomes) { ...@@ -194,16 +195,18 @@ foreach my $chr ($support->sort_chromosomes) {
} }
$support->log("Ref: $R_sub_seq\n", 3); $support->log("Ref: $R_sub_seq\n", 3);
$support->log("Alt: $A_sub_seq\n", 3); $support->log("Alt: $A_sub_seq\n\n", 3);
$i++; $i++;
} }
$k++;
} }
if ($i) { if ($i) {
$support->log("Total: $i problematic alignments.\n", 2); $support->log("Total: $i (of $k) alignments contain sequence mismatches.\n", 2);
} else { } else {
$support->log("All alignments ok.\n", 2); $support->log("All $k alignments ok.\n", 2);
} }
$support->log_stamped("Done.\n\n", 1); $support->log_stamped("Done.\n\n", 1);
......
...@@ -151,7 +151,7 @@ my %stats_total; ...@@ -151,7 +151,7 @@ my %stats_total;
$support->log_stamped("Looping over chromosomes...\n"); $support->log_stamped("Looping over chromosomes...\n");
foreach my $R_chr ($support->sort_chromosomes) { foreach my $R_chr ($support->sort_chromosomes($support->get_chrlength(undef, undef, 'chromosome'))) {
$support->log_stamped("\nChromosome $R_chr...\n", 1); $support->log_stamped("\nChromosome $R_chr...\n", 1);
# fetch chromosome slice and project to clones # fetch chromosome slice and project to clones
......
...@@ -142,7 +142,7 @@ my $sth = $dbh->prepare($sql); ...@@ -142,7 +142,7 @@ my $sth = $dbh->prepare($sql);
my $fmt1 = "%10s %10s %10s %10s %3s\n"; my $fmt1 = "%10s %10s %10s %10s %3s\n";
my @rows = (); my @rows = ();
foreach my $chr ($support->sort_chromosomes) { foreach my $chr ($support->sort_chromosomes($support->get_chrlength(undef, undef, 'chromosome'))) {
$support->log_stamped("\nChromosome $chr...\n"); $support->log_stamped("\nChromosome $chr...\n");
$sth->execute($chr); $sth->execute($chr);
......
...@@ -151,7 +151,7 @@ my $fmt3 = "%-44s%8.0f (%2.0f%%)\n"; ...@@ -151,7 +151,7 @@ my $fmt3 = "%-44s%8.0f (%2.0f%%)\n";
$support->log("Looping over chromosomes...\n\n"); $support->log("Looping over chromosomes...\n\n");
foreach my $chr ($support->sort_chromosomes) { foreach my $chr ($support->sort_chromosomes($support->get_chrlength(undef, undef, 'chromosome'))) {
$support->log_stamped("Chromosome $chr...\n", 1); $support->log_stamped("Chromosome $chr...\n", 1);
# determine non-N sequence length of alternative chromosome # determine non-N sequence length of alternative chromosome
...@@ -176,6 +176,7 @@ foreach my $chr ($support->sort_chromosomes) { ...@@ -176,6 +176,7 @@ foreach my $chr ($support->sort_chromosomes) {
# determine total length of mapping to alternative assembly # determine total length of mapping to alternative assembly
my $mapping_length = 0; my $mapping_length = 0;
my %blocks; my %blocks;
my %blocklength;
# chromosome length order of magnitude # chromosome length order of magnitude
my $oom = length($A_length); my $oom = length($A_length);
...@@ -197,7 +198,10 @@ foreach my $chr ($support->sort_chromosomes) { ...@@ -197,7 +198,10 @@ foreach my $chr ($support->sort_chromosomes) {
my $c_oom = $oom; my $c_oom = $oom;
while ($c_oom) { while ($c_oom) {
if ($l > 10**($c_oom-1) and $l <= 10**$c_oom) { $blocks{10**$c_oom}++; } if ($l > 10**($c_oom-1) and $l <= 10**$c_oom) {
$blocks{10**$c_oom}++;
$blocklength{10**$c_oom} += $l;
}
$c_oom--; $c_oom--;
} }
$blocks{10}++ if ($l == 1); $blocks{10}++ if ($l == 1);
...@@ -227,7 +231,7 @@ foreach my $chr ($support->sort_chromosomes) { ...@@ -227,7 +231,7 @@ foreach my $chr ($support->sort_chromosomes) {
for (my $i = 0; $i < $oom; $i++) { for (my $i = 0; $i < $oom; $i++) {
my $from = 10**$i; my $from = 10**$i;
my $to = 10**($i+1); my $to = 10**($i+1);
$support->log(sprintf($fmt3, " ".$support->commify($from)." - ".$support->commify($to)." bp:", $blocks{$to}, $blocks{$to}/$alignments*100), 2); $support->log(sprintf($fmt3, " ".$support->commify($from)." - ".$support->commify($to)." bp:", $blocks{$to}, $blocklength{$to}/$R_slice->length*100), 2);
} }
} }
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment