Skip to content
Snippets Groups Projects
Commit 2add40a4 authored by Simon Potter's avatar Simon Potter
Browse files

fixed for new style contigs IDs

moved sub's to bottom to make it readable
parent 1c48a97e
No related branches found
No related tags found
No related merge requests found
......@@ -20,89 +20,12 @@ print STDERR ( "Start reading contig.txt.\n" );
while( <CF> ) {
my ( $contig, $offset, $length ) = split;
( $clone ) = $contig =~ /^([^\.]+)\./;
( $clone ) = $contig =~ /^([^\.]+\.\d+)\./;
$clone_offset_hash{$clone}->{$contig} = [ $offset, $length ];
}
close CF;
print STDERR ( "Contig.txt read.\n" );
sub append_agp {
/\.agp$/ &&
push( @filelist, $File::Find::name );
}
sub read_agp {
$filename = shift;
local *FH;
open( FH, $filename ) or die( "Cant open file $filename." );
while( <FH> ) {
/^#/ && next;
@list = split( /\t/, $_ );
if( $list[4] =~ /[^N]/ ) {
$readLines++;
( $clone ) = $list[5] =~ /^([^\.]+)\./;
$start = $list[6];
$end = $list[7];
if( defined( $clone_offset_hash{$clone} )) {
$known_clone++;
check_line( $clone, $start, $end, $_ );
} else {
$unknown_clone++;
}
}
}
close FH;
}
sub check_line {
my ( $clone, $start, $end, $line ) = @_;
my $clonehashref = $clone_offset_hash{$clone};
my ( $offset, $length );
for my $contig ( keys %{$clonehashref} ) {
( $offset, $length ) = @{$clonehashref->{$contig}};
# match check
if( $start >= $offset && ($end <= ( $offset+$length-1))) {
if( defined $contigUsage{$contig} ) {
push( @{$contigUsage{$contig}}, $line );
$fragmented{$clone}->{$contig} = 1;
$double_used_contigs++;
} else {
push( @{$contigUsage{$contig}}, $line );
}
}
# partly match == mismatch
if((( $start < $offset ) && ( $end >= $offset )) ||
(( $start <= ($offset+$length-1)) && ( $end > ($offset+$length-1)))) {
push( @{$mismatched{$clone}}, $line );
$mismatched++;
return;
}
}
}
sub clone_hash_string {
my $clonename = shift;
my $hashref = $clone_offset_hash{$clonename};
my $result;
my @keylist_sorted;
$result .= "EnsEMBL Freeze Clone $clonename\n";
@keylist_sorted = sort { $hashref->{$a}[0] <=> $hashref->{$b}[0] } ( keys %{$hashref} );
for my $contig ( @keylist_sorted ) {
$result .= "$contig ";
my ( $offset, $length );
( $offset, $length ) = @{$hashref->{$contig}};
$result .= "$offset ".($offset+$length-1)."\n";
}
return $result;
}
find( \&append_agp, $agp_dir );
print STDERR ( "Reading ", $#filelist+1, " files.\n" );
......@@ -113,7 +36,6 @@ for $filename (@filelist) {
read_agp( $filename );
$count++;
if( $count % 100 == 0 ) {
# last;
print STDERR ( "$count read.\n" );
}
}
......@@ -161,5 +83,82 @@ $mismatched lines were mismatches.
$unknown_clone lines had unknown clones.
);
exit;
sub append_agp {
/\.agp$/ &&
push( @filelist, $File::Find::name );
}
sub read_agp {
$filename = shift;
local *FH;
open( FH, $filename ) or die( "Cant open file $filename." );
while( <FH> ) {
/^#/ && next;
@list = split( /\t/, $_ );
if( $list[4] =~ /[^N]/ ) {
$readLines++;
( $clone ) = $list[5] =~ /^([^\.]+\.\d+)$/;
$start = $list[6];
$end = $list[7];
if( defined( $clone_offset_hash{$clone} )) {
$known_clone++;
check_line( $clone, $start, $end, $_ );
} else {
$unknown_clone++;
print "Unknown $clone\n";
}
}
}
close FH;
}
sub clone_hash_string {
my $clonename = shift;
my $hashref = $clone_offset_hash{$clonename};
my $result;
my @keylist_sorted;
$result .= "EnsEMBL Freeze Clone $clonename\n";
@keylist_sorted = sort { $hashref->{$a}[0] <=> $hashref->{$b}[0] } ( keys %{$hashref} );
for my $contig ( @keylist_sorted ) {
$result .= "$contig ";
my ( $offset, $length );
( $offset, $length ) = @{$hashref->{$contig}};
$result .= "$offset ".($offset+$length-1)."\n";
}
return $result;
}
sub check_line {
my ( $clone, $start, $end, $line ) = @_;
my $clonehashref = $clone_offset_hash{$clone};
my ( $offset, $length );
for my $contig ( keys %{$clonehashref} ) {
( $offset, $length ) = @{$clonehashref->{$contig}};
# match check
if( $start >= $offset && ($end <= ( $offset+$length-1))) {
if( defined $contigUsage{$contig} ) {
push( @{$contigUsage{$contig}}, $line );
$fragmented{$clone}->{$contig} = 1;
$double_used_contigs++;
} else {
push( @{$contigUsage{$contig}}, $line );
}
}
# partly match == mismatch
if((( $start < $offset ) && ( $end >= $offset )) ||
(( $start <= ($offset+$length-1)) && ( $end > ($offset+$length-1)))) {
push( @{$mismatched{$clone}}, $line );
$mismatched++;
return;
}
}
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment