From 38f511961b64e4ee1bc215e3b034bbb7ae542473 Mon Sep 17 00:00:00 2001 From: Monika Komorowska <mk8@sanger.ac.uk> Date: Mon, 9 Jan 2012 15:15:34 +0000 Subject: [PATCH] Don't remove core data if status is moved back to mapping_finished. Changes to alt_alleles: use the first non-reference gene if there is no reference gene in an alt_allele. --- .../xref_mapping/XrefMapper/BasicMapper.pm | 102 ++++++++---------- 1 file changed, 46 insertions(+), 56 deletions(-) diff --git a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm index 51427d8ede..f1f37b69a1 100644 --- a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm +++ b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm @@ -533,13 +533,16 @@ sub get_alt_alleles { my $gene_id; my $alt_id; - my $sth = $self->core->dbc->prepare("select alt_allele_id, gene_id from alt_allele"); + my $is_ref; + my $sth = $self->core->dbc->prepare("select alt_allele_id, gene_id, is_ref from alt_allele"); $sth->execute; - $sth->bind_columns(\$alt_id,\$gene_id); + $sth->bind_columns(\$alt_id,\$gene_id, \$is_ref); my $count = 0 ; my %alt_id_to_gene_id; my %gene_id_to_alt_id; my $max_alt_id = 0; + my %is_reference; + while($sth->fetch){ $count++; push @{$alt_id_to_gene_id{$alt_id}}, $gene_id; @@ -547,52 +550,36 @@ sub get_alt_alleles { if($alt_id > $max_alt_id){ $max_alt_id = $alt_id; } + if ($is_ref) { + $is_reference{$gene_id} = 1; + } } my $insert_sth = $self->xref->dbc->prepare("insert into alt_allele (alt_allele_id, gene_id, is_reference) values (?, ?,?)"); - - if($count){ - my %non_reference; - - my $sql = (<<"SEQ"); -SELECT g.gene_id - FROM gene g, seq_region_attrib sra, attrib_type at - WHERE g.seq_region_id = sra.seq_region_id AND - at.attrib_type_id = sra.attrib_type_id AND - at.code = 'non_ref' -SEQ - - $sth = $self->core->dbc->prepare($sql); - $sth->execute; - $sth->bind_columns(\$gene_id); - while($sth->fetch()){ - $non_reference{$gene_id} = 1; - } - + $sth = $self->xref->dbc->prepare("delete from alt_allele"); $sth->execute; - my $alt_added = 0; my $num_of_genes = 0; my $alt_failed = 0; foreach my $alt_id (keys %alt_id_to_gene_id){ - # make sure one and only one is on the reference + # make sure only one gene or none are on the reference my $ref_count = 0; foreach my $gene (@{$alt_id_to_gene_id{$alt_id}}){ - if(!defined($non_reference{$gene})){ + if(defined($is_reference{$gene})){ $ref_count++; } } - if($ref_count == 1){ + if($ref_count == 1 || $ref_count == 0){ $alt_added++; foreach my $gene (@{$alt_id_to_gene_id{$alt_id}}){ $num_of_genes++; my $ref =0 ; - if(!defined($non_reference{$gene})){ + if(defined($is_reference{$gene})){ $ref = 1; } $insert_sth->execute($alt_id, $gene, $ref); @@ -894,7 +881,7 @@ sub get_species_id_from_species_name{ sub clean_up{ my $self = shift; my $stats = shift; - + my $keep_core_data = shift; # remove all object_xref, identity_xref entries @@ -926,27 +913,27 @@ sub clean_up{ $sth->execute(); - - # remove all from core_info tables - # gene_transcript_translation - # [gene/transcript/translation]_stable_id - # - $sql = "DELETE from gene_transcript_translation"; - $sth = $self->xref->dbc->prepare($sql); - $sth->execute(); + if (!$keep_core_data) { + # remove all from core_info tables + # gene_transcript_translation + # [gene/transcript/translation]_stable_id + # + $sql = "DELETE from gene_transcript_translation"; + $sth = $self->xref->dbc->prepare($sql); + $sth->execute(); - $sql = "DELETE from gene_stable_id"; - $sth = $self->xref->dbc->prepare($sql); - $sth->execute(); + $sql = "DELETE from gene_stable_id"; + $sth = $self->xref->dbc->prepare($sql); + $sth->execute(); - $sql = "DELETE from transcript_stable_id"; - $sth = $self->xref->dbc->prepare($sql); - $sth->execute(); - - $sql = "DELETE from translation_stable_id"; - $sth = $self->xref->dbc->prepare($sql); - $sth->execute(); + $sql = "DELETE from transcript_stable_id"; + $sth = $self->xref->dbc->prepare($sql); + $sth->execute(); + $sql = "DELETE from translation_stable_id"; + $sth = $self->xref->dbc->prepare($sql); + $sth->execute(); + } return; } @@ -984,7 +971,8 @@ sub revert_to_parsing_finished{ sub revert_to_mapping_finished{ my $self = shift; - $self->clean_up(); + + $self->clean_up(undef,1); # set mapping jobs to SUBMITTED my $sql = 'UPDATE mapping_jobs set status = "SUBMITTED"';; @@ -1006,21 +994,23 @@ sub get_alt_allele_hashes{ my %alt_to_ref; my %ref_to_alts; - my $sql = "select gene_id, is_reference from alt_allele order by alt_allele_id, is_reference DESC"; + my $sql = "select alt_allele_id, gene_id, is_reference from alt_allele order by alt_allele_id, is_reference DESC"; my $sth = $self->xref->dbc->prepare($sql); $sth->execute(); - my ($gene_id, $is_ref); - $sth->bind_columns(\$gene_id, \$is_ref); + my ($alt_allele_id,$gene_id, $is_ref); + $sth->bind_columns(\$alt_allele_id, \$gene_id, \$is_ref); + my $last_alt_allele = 0; my $ref_gene; while($sth->fetch()){ - if($is_ref){ - $ref_gene = $gene_id; - } - else{ - $alt_to_ref{$gene_id} = $ref_gene; - push @{$ref_to_alts{$ref_gene}}, $gene_id; - } + if( $alt_allele_id != $last_alt_allele) { + #use the first non-reference gene if there is no reference gene in an alt_allele + $ref_gene = $gene_id; + } else{ + $alt_to_ref{$gene_id} = $ref_gene; + push @{$ref_to_alts{$ref_gene}}, $gene_id; + } + $last_alt_allele = $alt_allele_id; } $sth->finish; -- GitLab