Commit 0fe32d38 authored by Andy Yates's avatar Andy Yates
Browse files

Attempting to fix the issues with too many KNOWN status genes appearing in...

Attempting to fix the issues with too many KNOWN status genes appearing in species we should not be seeing them in. UniProtParser now limits to using ACs with an evidence of 1 and 2. We now do not assign KNOWN based upon a depdendent xref when we didn't trust the parent xref. Also the loader & SQL has been edited to allow for LOWEVIDENCE types of sources.
parent a5643444
......@@ -71,8 +71,8 @@ sub set_status_for_source_from_core{
my %external_name_to_status;
my $sth = $self->core->dbc->prepare('select db_name, status from external_db where status like "KNOWN%"');
$sth->execute();
my $sth = $self->core->dbc->prepare('select db_name, status from external_db where status like ?');
$sth->execute('KNOWN%');
my ($name, $status, $id);
$sth->bind_columns(\$name,\$status);
while($sth->fetch()){
......@@ -80,18 +80,16 @@ sub set_status_for_source_from_core{
}
$sth->finish;
my $sth_up = $self->xref->dbc->prepare("update source set status = ? where source_id = ?");
my $sth_up = $self->xref->dbc->prepare("update source set status = 'KNOWN' where source_id = ?");
my $sql = 'select s.source_id, s.name from source s, xref x where x.source_id = s.source_id group by s.source_id'; # only get those of interest
my $sql = 'select s.source_id, s.name from source s, xref x where x.source_id = s.source_id and s.status =? group by s.source_id'; # only get those of interest
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->execute('NOIDEA');
$sth->bind_columns(\$id, \$name);
while($sth->fetch()){
if(defined($external_name_to_status{$name})){
# set status
$sth_up->execute($id);
$sth_up->execute('KNOWN', $id);
}
}
$sth->finish;
......
......@@ -359,6 +359,7 @@ from ( source s
on (gtt_translation.translation_id = ox.ensembl_id)
where ox.ox_status = 'DUMP_OUT'
AND s.status like "KNOWN%"
AND ox.linkage_type <> 'DEPENDENT'
ORDER BY gene_id DESC, transcript_id DESC
DXS
......@@ -1393,6 +1394,8 @@ from ( display_xref_prioritys p
left join gene_transcript_translation gtt_translation
on (gtt_translation.translation_id = ox.ensembl_id)
where ox.ox_status = 'DUMP_OUT'
AND s.status like "KNOWN%"
AND ox.linkage_type <> 'DEPENDENT'
order by gene_id DESC, p.priority DESC, (ix.target_identity+ix.query_identity) DESC, ox.unused_priority DESC
DXS
......@@ -1655,6 +1658,8 @@ from ( gene_desc_prioritys p
left join gene_transcript_translation gtt_translation
on (gtt_translation.translation_id = ox.ensembl_id)
where ox.ox_status = 'DUMP_OUT'
AND s.status like "KNOWN%"
AND ox.linkage_type <> 'DEPENDENT'
order by gene_id desc,
p.priority desc,
(ix.target_identity+ix.query_identity) desc
......
......@@ -43,11 +43,11 @@ sub run {
$self->get_source_id_for_source_name('Uniprot/SPTREMBL', '');
$sptr_non_display_source_id =
$self->get_source_id_for_source_name('Uniprot/SPTREMBL', 'protein_evidence_gt_3');
$self->get_source_id_for_source_name('Uniprot/SPTREMBL', 'protein_evidence_gt_2');
print "SwissProt source id for $file: $sp_source_id\n" if ($verbose);
print "SpTREMBL source id for $file: $sptr_source_id\n" if ($verbose);
print "SpTREMBL protein_evidence > 3 source id for $file: $sptr_non_display_source_id\n" if ($verbose);
print "SpTREMBL protein_evidence > 2 source id for $file: $sptr_non_display_source_id\n" if ($verbose);
my @xrefs =
......@@ -283,7 +283,8 @@ sub create_xrefs {
}
else {
if (defined($protein_evidence_code) && $protein_evidence_code <= 3) {
#Use normal source only if it is PE levels 1 & 2
if (defined($protein_evidence_code) && $protein_evidence_code < 3) {
$xref->{SOURCE_ID} = $sptr_source_id;
$num_sptr++;
} else {
......
......@@ -89,7 +89,7 @@ CREATE TABLE source (
source_id int unsigned not null auto_increment,
name varchar(255) not null,
status enum('KNOWN','XREF','PRED','ORTH','PSEUDO','NOIDEA') not null default 'NOIDEA',
status enum('KNOWN','XREF','PRED','ORTH','PSEUDO','LOWEVIDENCE','NOIDEA') not null default 'NOIDEA',
source_release varchar(255),
download enum('Y', 'N') default 'Y',
ordered int unsigned not null,
......
......@@ -2693,16 +2693,17 @@ dependent = MIM
release_uri = ftp://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/reldate.txt
data_uri = ftp://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/uniprot_trembl.dat.gz
[source Uniprot/SPTREMBL::MULTI-evidence_gt_3]
# Additional source for entires with evidence at protein level > 4 for Uniprot/SPTREMBL::MULTI
# These are not taken into account when deriving display xrefs.
[source Uniprot/SPTREMBL::MULTI-evidence_gt_2]
# Additional source for entires with evidence at protein level > 2 (numerically) for Uniprot/SPTREMBL::MULTI
# These are not taken into account when deriving display xrefs or assigning gene status
name = Uniprot/SPTREMBL
download = N
order = 20
priority = 2
prio_descr = protein_evidence_gt_3
prio_descr = protein_evidence_gt_2
parser = UniProtAltParser
release_uri =
status = LOWEVIDENCE
[source Uniprot/SPTREMBL::MULTI-predicted]
......
......@@ -95,14 +95,15 @@ foreach my $source_section ( sort( $config->GroupMembers('source') ) ) {
print( "INSERT INTO source "
. "(name, source_release, download, ordered, "
. "priority, priority_description)\n" );
printf( "VALUES ('%s', '1', '%s', %d, %d, '%s');\n",
. "priority, priority_description, status)\n" );
printf( "VALUES ('%s', '1', '%s', %d, %d, '%s', '%s');\n",
$config->val( $source_section, 'name' ),
$config->val( $source_section, 'download' ),
$config->val( $source_section, 'order' ),
$config->val( $source_section, 'priority' ),
$config->val( $source_section, 'prio_descr' ) );
$config->val( $source_section, 'prio_descr' ),
$config->val($source_section, 'status', 'NOIDEA') );
print("\n");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment