Skip to content
Snippets Groups Projects
Commit 30adda6b authored by Patrick Meidl's avatar Patrick Meidl
Browse files

some fixes for and notes about PAR glitches/gotchas

parent 37baabf6
No related branches found
No related tags found
No related merge requests found
......@@ -142,6 +142,8 @@ while (<FILE>) {
my $proj_slice = $segments[0]->to_Slice;
next unless ($feat->length == $proj_slice->length);
next unless ($proj_slice->seq_region_name eq $feat->slice->seq_region_name);
# everything looks fine, so adjust the coords of your feature
$feat->start($proj_slice->start);
$feat->end($proj_slice->end);
......
......@@ -138,6 +138,24 @@ Scripts and main modules used
ensembl/misc-scripts/cleanup_tmp_tables.pl
-------------------------------------------------------------------------------
Known bugs
-------------------------------------------------------------------------------
1. PAR regions:
===============
When projecting from a symlinked region of a PAR, you will end up in the
reference region (e.g. projecting from human chromosome Y gets you to X). The
coordinates of the projected slice will therefor also reflect the reference
region and might not be what you expect. Since features should only be stored
once (on the reference region) anyway, these projections should be ignored.
If you find other bugs please report them to the Ensembl development mailing
list <ensembl-dev@ebi.ac.uk> or to Patrick Meidl <meidl@ebi.ac.uk>.
-------------------------------------------------------------------------------
Further documentation
-------------------------------------------------------------------------------
......
......@@ -166,6 +166,10 @@ foreach my $chr ($support->sort_chromosomes) {
# alternative sequence
my $A_proj_slice = $seg->to_Slice;
# ignore PAR region (i.e. we project to the symlinked seq_region)
next if ($A_proj_slice->seq_region_name ne $chr);
my $A_sub_slice = $A_slice->sub_Slice($A_proj_slice->start, $A_proj_slice->end, $A_proj_slice->strand);
my $A_seq = $A_sub_slice->seq;
......
......@@ -183,8 +183,15 @@ foreach my $chr ($support->sort_chromosomes) {
my $R_slice = $R_sa->fetch_by_region('chromosome', $chr);
my @segments = @{ $R_slice->project('chromosome', $support->param('altassembly')) };
my $alignments = 0;
foreach my $seg (@segments) {
my $l = $seg->to_Slice->length;
my $sl = $seg->to_Slice;
# ignore PAR region (i.e. we project to the symlinked seq_region)
next if ($sl->seq_region_name ne $chr);
my $l = $sl->length;
$mapping_length += $l;
my $c_oom = $oom;
......@@ -194,6 +201,8 @@ foreach my $chr ($support->sort_chromosomes) {
$c_oom--;
}
$blocks{10}++ if ($l == 1);
$alignments++;
}
# print stats
......@@ -212,15 +221,13 @@ foreach my $chr ($support->sort_chromosomes) {
$support->log("\n");
my $segs = scalar(@segments);
$support->log(sprintf($fmt1, "Total alignments:", $segs), 2);
$support->log(sprintf($fmt1, "Total alignments:", $alignments), 2);
if ($segs) {
if ($alignments) {
for (my $i = 0; $i < $oom; $i++) {
my $from = 10**$i;
my $to = 10**($i+1);
$support->log(sprintf($fmt3, " ".$support->commify($from)." - ".$support->commify($to)." bp:", $blocks{$to}, $blocks{$to}/$segs*100), 2);
$support->log(sprintf($fmt3, " ".$support->commify($from)." - ".$support->commify($to)." bp:", $blocks{$to}, $blocks{$to}/$alignments*100), 2);
}
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment