Skip to content
Snippets Groups Projects
Commit f70524ae authored by Patrick Meidl's avatar Patrick Meidl
Browse files

changes from branch-vega-31-dev

parent 74260126
No related branches found
No related tags found
No related merge requests found
......@@ -461,11 +461,40 @@ sub get_all_homologous_Genes{
sub type {
my $self = shift;
$self->{'biotype'} = shift if( @_ );
return ( $self->{'biotype'} || "protein_coding" );
my $self = shift;
$self->{'biotype'} = shift if( @_ );
# map biotype/confidence to HAWK classification for Vega
if ($self->source eq 'vega') {
my %typemap = (
'protein_coding.KNOWN' => 'Known',
'protein_coding.NOVEL' => 'Novel_CDS',
'unclassified.NOVEL' => 'Novel_Transcript',
'protein_coding.PREDICTED' => 'Predicted_Gene',
'unclassified.PUTATIVE' => 'Putative',
'pseudogene.KNOWN' => 'Pseudogene',
'pseudogene.NOVEL' => 'Pseudogene',
'processed_pseudogene.KNOWN' => 'Processed_pseudogene',
'processed_pseudogene.NOVEL' => 'Processed_pseudogene',
'unprocessed_pseudogene.KNOWN' => 'Unprocessed_pseudogene',
'unprocessed_pseudogene.NOVEL' => 'Unprocessed_pseudogene',
'Ig_pseudogene_segment.KNOWN' => 'Ig_Pseudogene_Segment',
'Ig_pseudogene_segment.NOVEL' => 'Ig_Pseudogene_Segment',
'Ig_segment.KNOWN' => 'Ig_Segment',
'Ig_segment.NOVEL' => 'Ig_Segment',
);
my $bio_conf = $self->biotype.".".$self->confidence;
if ($typemap{$bio_conf}) {
return $typemap{$bio_conf};
} else {
warning("biotype.confidence ($bio_conf) cannot be resolved to HAWK type");
}
# for all other sources, return biotype
} else {
return ( $self->{'biotype'} || "protein_coding" );
}
}
......
......@@ -21,14 +21,14 @@ ALTER TABLE transcript ADD description text;
UPDATE transcript t, xref x SET t.description = x.description WHERE t.display_xref_id = x.xref_id;
UPDATE transcript SET description=NULL WHERE description="";
# usefull settings for the new tables
UPDATE gene SET biotype='protein_coding' WHERE biotype = 'ensembl';
UPDATE gene SET confidence='KNOWN' WHERE display_xref_id != 0;
UPDATE transcript SET confidence='KNOWN' WHERE display_xref_id != 0;
UPDATE gene SET source = 'ensembl';
UPDATE gene g, xref x, external_db ed SET g.confidence='KNOWN' WHERE g.display_xref_id = x.xref_id and x.external_db_id = ed.external_db_id and g.display_xref_id != 0 and ed.status like 'KNOWN%';
UPDATE transcript t, xref x, external_db ed SET t.confidence='KNOWN' WHERE t.display_xref_id = x.xref_id and x.external_db_id = ed.external_db_id and t.display_xref_id != 0 and ed.status like 'KNOWN%';
# some vega specific stuff, shouldnt harm anybody else
UPDATE gene SET biotype='unclassified' WHERE biotype = 'Transcript';
UPDATE gene SET biotype='pseudogene' WHERE biotype = 'Pseudogene';
UPDATE gene SET biotype='protein_coding', confidence='NOVEL' WHERE biotype = 'Novel_CDS';
......@@ -51,7 +51,6 @@ UPDATE transcript t, gene g SET t.biotype = g.biotype WHERE g.gene_id = t.gene_i
# new tables regulatory stuff and transcript supporting features
################################################################################
#
# Table structure for table 'regulatory_feature'
......
###################################################################
#FROM patch_23_24.sql
# FROM patch_23_24.sql
# Adds a display_label column to the prediction_transcript table and
# populates it.
......@@ -42,3 +42,60 @@ WHERE a.analysis_id = pt.analysis_id;
DROP TABLE prediction_transcript;
ALTER TABLE tmp_prediction_transcript RENAME prediction_transcript;
###################################################################
# FROM patch_30_31.sql
# gene table changes
ALTER TABLE gene CHANGE type biotype VARCHAR(40) NOT NULL default 'protein_coding';
ALTER TABLE gene ADD source VARCHAR(20) NOT NULL default 'ensembl';
ALTER TABLE gene ADD confidence ENUM( 'KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED' ) default 'NOVEL';
ALTER TABLE gene ADD description text;
UPDATE gene g, gene_description gd SET g.description = gd.description WHERE gd.gene_id = g.gene_id;
DROP TABLE gene_description;
# transcript related changes
ALTER TABLE transcript ADD biotype VARCHAR(40) NOT NULL DEFAULT 'protein_coding';
ALTER TABLE transcript ADD confidence ENUM( 'KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED' ) default 'NOVEL';
ALTER TABLE transcript ADD description text;
# reasonable default for transcript description
# it might be questionable whether a separate transcript description is necessary
UPDATE transcript t, xref x SET t.description = x.description WHERE t.display_xref_id = x.xref_id;
UPDATE transcript SET description=NULL WHERE description="";
# usefull settings for the new tables
UPDATE gene SET source = 'vega';
UPDATE gene g, xref x, external_db ed SET g.confidence='KNOWN' WHERE g.display_xref_id = x.xref_id and x.external_db_id = ed.external_db_id and g.display_xref_id != 0 and ed.status like 'KNOWN%';
UPDATE transcript t, xref x, external_db ed SET t.confidence='KNOWN' WHERE t.display_xref_id = x.xref_id and x.external_db_id = ed.external_db_id and t.display_xref_id != 0 and ed.status like 'KNOWN%';
# some vega specific stuff, shouldnt harm anybody else
UPDATE gene SET biotype='unclassified' WHERE biotype = 'Transcript';
UPDATE gene SET biotype='pseudogene' WHERE biotype = 'Pseudogene';
UPDATE gene SET biotype='protein_coding', confidence='NOVEL' WHERE biotype = 'Novel_CDS';
UPDATE gene SET biotype='unclassified', confidence='NOVEL' WHERE biotype = 'Novel_Transcript';
UPDATE gene SET biotype='unclassified',confidence='PUTATIVE' WHERE biotype = 'Putative';
UPDATE gene SET biotype='protein_coding', confidence='KNOWN' WHERE biotype = 'Known';
UPDATE gene SET biotype='processed_pseudogene' WHERE biotype = 'Processed_pseudogene';
UPDATE gene SET biotype='unprocessed_pseudogene' WHERE biotype = 'Unprocessed_pseudogene';
UPDATE gene SET biotype='protein_coding',confidence='PREDICTED' WHERE biotype = 'Predicted_Gene';
UPDATE gene SET biotype='Ig_segment' WHERE biotype = 'Ig_Segment';
UPDATE gene SET biotype='Ig_pseudogene_segment' WHERE biotype = 'Ig_Pseudogene_Segment';
UPDATE gene SET biotype=replace( biotype, '-','_' );
# reasonable biotypes for the transcripts, take the one from the gene
UPDATE transcript t, gene g SET t.biotype = g.biotype WHERE g.gene_id = t.gene_id;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment