Skip to content
Snippets Groups Projects
Commit 2c378673 authored by Glenn Proctor's avatar Glenn Proctor
Browse files

Added removal of promiscuous probesets

parent 2191798c
No related branches found
No related tags found
No related merge requests found
......@@ -71,7 +71,7 @@ my %exonic_probesets;
my %utr_probesets;
my %intronic_probesets;
my %promiscuous_probes;
my %promiscuous_probesets;
my %dbentries_per_probeset;
my $i = 0;
......@@ -102,6 +102,9 @@ foreach my $transcript (@{$transcript_adaptor->fetch_all()}) {
foreach my $feature (@$oligo_features) {
my $probeset = $feature->probeset();
next if ($promiscuous_probesets{$probeset});
my $probe = $feature->probe();
my $probe_length = $probe->probelength();
my $min_overlap = ($probe_length - $max_mismatches);
......@@ -206,40 +209,37 @@ sub add_xref {
# $dbe->dbID:$transcript->dbID
push @{$dbentries_per_probeset{$probeset}}, $dbe->dbID() . ":" . $transcript->dbID();
# if any probesets map to more than 100 transcripts, ignore them in future and
# delete existing mappings to them
if (scalar(@{$dbentries_per_probeset{$probeset}}) > $max_probesets_per_transcript) {
$promiscuous_probesets{$probeset} = $probeset;
remove_probeset_transcript_mappings($t_db, $probeset);
}
}
}
# ----------------------------------------------------------------------
# Remove mappings for probesets that map to more than 100 transcripts
# Remove mappings for a particular probeset
# TODO This is horribly inefficent, potentially thousands of unnecessary db calls
sub remove_probeset_transcript_mappings {
sub prune_promiscuous_probesets {
my ($dba, $probeset) = @_;
my ($dba) = @_;
print "Removing probesets that map to more than $max_probesets_per_transcript transcripts\n";
print "Removing probeset $probeset\n";
my $p = 0;
my $sth = $dba->dbc()->prepare("DELETE FROM object_xref WHERE xref_id=? AND ensembl_object_type='Transcript' AND ensembl_id=?");
foreach my $probeset (keys %dbentries_per_probeset) {
my $values = @{$dbentries_per_probeset{$probeset}};
my $values = @{$dbentries_per_probeset{$probeset}};
if ($values > $max_probesets_per_transcript) {
foreach my $value (@{$dbentries_per_probeset{$probeset}}) {
foreach my $value (@{$dbentries_per_probeset{$probeset}}) {
my ($dbe_id, $transcript_id) = split(/:/, $value);
my ($dbe_id, $transcript_id) = split(/:/, $value);
$sth->execute($dbe_id, $transcript_id);
$p++;
}
}
$sth->execute($dbe_id, $transcript_id);
$p++;
}
......@@ -247,8 +247,6 @@ sub prune_promiscuous_probesets {
print "Removed $p probeset-transcript mappings\n";
# TODO - remove any xrefs that have 0 object_xrefs?
}
# ----------------------------------------------------------------------
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment