Newer
Older
package XrefMapper::DisplayXrefs;
use vars '@ISA';
@ISA = qw{ XrefMapper::BasicMapper };
use warnings;
use XrefMapper::BasicMapper;
use Cwd;
use DBI;
use File::Basename;
use IPC::Open3;
my %genes_to_transcripts;
my %translation_to_transcript;
my %transcript_to_translation;
my %transcript_length;
#
# ignore should be some sql to return object_xref_ids that should be ignored. FOR full mode METHOD
# ignore should return regexp and source name as key for update METHODS
#
sub gene_description_sources {
return ("RFAM",
Arnaud Kerhornou
committed
"RNAMMER",
"TRNASCAN_SE",
"miRBase",
"IMGT/GENE_DB",
"Uniprot/SWISSPROT",
"RefSeq_peptide",
"RefSeq_dna",
"Uniprot/Varsplic",
"Uniprot/SPTREMBL");
}
sub gene_description_filter_regexps {
return ();
}
sub transcript_display_xref_sources {
my $self = shift;
my @list = qw(HGNC
MGI
Clone_based_vega_gene
Clone_based_ensembl_gene
HGNC_transcript_name
MGI_transcript_name
Clone_based_vega_transcript
Clone_based_ensembl_transcript
miRBase
RFAM
IMGT/GENE_DB
SGD
flybase_symbol
Anopheles_symbol
Genoscope_annotated_gene
Uniprot/SWISSPROT
Uniprot/Varsplic
Uniprot/SPTREMBL
EntrezGene);
$ignore{"EntrezGene"} =(<<'IEG');
Ian Longden
committed
SELECT DISTINCT ox.object_xref_id
FROM object_xref ox, dependent_xref dx,
xref xmas, xref xdep,
source smas, source sdep
WHERE ox.xref_id = dx.dependent_xref_id AND
dx.dependent_xref_id = xdep.xref_id AND
dx.master_xref_id = xmas.xref_id AND
xmas.source_id = smas.source_id AND
xdep.source_id = sdep.source_id AND
smas.name like "Refseq%predicted" AND
sdep.name like "EntrezGene" AND
ox.ox_status = "DUMP_OUT"
IEG
Monika Komorowska
committed
$ignore{"Uniprot/SPTREMBL"} =(<<BIGN);
SELECT object_xref_id
FROM object_xref JOIN xref USING(xref_id) JOIN source USING(source_id)
WHERE ox_status = 'DUMP_OUT' AND name = 'Uniprot/SPTREMBL'
AND priority_description = 'protein_evidence_gt_2'
Monika Komorowska
committed
BIGN
return [\@list,\%ignore];
}
sub new {
my($class, $mapper) = @_;
my $self ={};
bless $self,$class;
$self->core($mapper->core);
$self->xref($mapper->xref);
$self->mapper($mapper);
Ian Longden
committed
$self->verbose($mapper->verbose);
return $self;
}
sub mapper{
my ($self, $arg) = @_;
(defined $arg) &&
($self->{_mapper} = $arg );
return $self->{_mapper};
}
sub genes_and_transcripts_attributes_set{
# Runs build_transcript_and_gene_display_xrefs,
# build_gene_transcript_status and
# build_meta_timestamp, and, if "-upload" is set, uses the SQL files
# produced to update the core database.
my $status;
if(defined($noxref_database)){
$status = "none";
}
else{
$status = $self->mapper->xref_latest_status();
}
if($self->mapper->can("set_display_xrefs")){
$self->mapper->set_display_xrefs();
}
else{
$self->set_display_xrefs();
}
my $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('display_xref_done',now())");
$sth_stat->execute();
$sth_stat->finish;
if($self->mapper->can("set_gene_descriptions")){
$self->mapper->set_gene_descriptions();
$self->set_gene_descriptions();
}
$self->set_status(); # set KNOWN,NOVEL etc
# }
$self->build_meta_timestamp;
# Special removal of LRG transcript display xref, xref and object_xrefs;
my $sth_lrg = $self->core->dbc->prepare('DELETE ox, x FROM object_xref ox, xref x, transcript t WHERE ox.xref_id = x.xref_id and t.display_xref_id = x.xref_id and t.stable_id like "LRG%"');
$sth_lrg->execute;
$sth_lrg = $self->core->dbc->prepare('UPDATE transcript SET display_xref_id = null WHERE stable_id like "LRG%" ');
$sth_lrg->execute;
#End of Special
$sth_lrg = $self->core->dbc->prepare("UPDATE xref SET info_text=null WHERE info_text=''");
$sth_lrg->execute;
if(!defined($noxref_database)){
my $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('gene_description_done',now())");
$sth_stat->execute();
$sth_stat->finish;
}
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
sub set_gene_descriptions_from_display_xref{
my $self = shift;
$self->set_gene_descriptions(1);
}
sub set_display_xrefs_from_stable_table{
my $self = shift;
print "Setting Transcript and Gene display_xrefs from xref database into core and setting the desc\n" if ($self->verbose);
my $xref_offset = $self->get_meta_value("xref_offset");
print "Using xref_off set of $xref_offset\n" if($self->verbose);
my $reset_sth = $self->core->dbc->prepare("UPDATE gene SET display_xref_id = null");
$reset_sth->execute();
$reset_sth->finish;
$reset_sth = $self->core->dbc->prepare("UPDATE transcript SET display_xref_id = null");
$reset_sth->execute();
$reset_sth->finish;
$reset_sth = $self->core->dbc->prepare("UPDATE gene SET description = null");
$reset_sth->execute();
$reset_sth->finish;
my %name_to_external_name;
my $sql = "select external_db_id, db_name, db_display_name from external_db";
my $sth = $self->core->dbc->prepare($sql);
$sth->execute();
my ($id, $name, $display_name);
$sth->bind_columns(\$id, \$name, \$display_name);
while($sth->fetch()){
$name_to_external_name{$name} = $display_name;
}
$sth->finish;
my %source_id_to_external_name;
$sql = 'select s.source_id, s.name from source s, xref x where x.source_id = s.source_id group by s.source_id'; # only get those of interest
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$id, \$name);
while($sth->fetch()){
if(defined($name_to_external_name{$name})){
$source_id_to_external_name{$id} = $name_to_external_name{$name};
}
}
$sth->finish;
my $update_gene_sth = $self->core->dbc->prepare("UPDATE gene g SET g.display_xref_id= ? WHERE g.gene_id=?");
my $update_gene_desc_sth = $self->core->dbc->prepare("UPDATE gene g SET g.description= ? WHERE g.gene_id=?");
my $update_tran_sth = $self->core->dbc->prepare("UPDATE transcript t SET t.display_xref_id= ? WHERE t.transcript_id=?");
my $get_gene_display_xref = $self->xref->dbc->prepare("SELECT gsi.internal_id, gsi.display_xref_id, x.description ,x.source_id, x.accession
FROM gene_stable_id gsi, xref x
WHERE gsi.display_xref_id = x.xref_id");
my $get_tran_display_xref = $self->xref->dbc->prepare("SELECT gsi.internal_id, gsi.display_xref_id from transcript_stable_id gsi");
$reset_sth = $self->xref->dbc->prepare("UPDATE gene_stable_id gsi SET gsi.desc_set=0");
$reset_sth->execute();
my $set_desc_done_sth = $self->xref->dbc->prepare("UPDATE gene_stable_id gsi SET gsi.desc_set=1 WHERE gsi.internal_id=?");
$get_gene_display_xref->execute();
my $xref_id;
my $desc;
my $gene_id;
my $source_id;
my $label;
$get_gene_display_xref->bind_columns(\$gene_id, \$xref_id, \$desc, \$source_id, \$label);
my $gene_count =0;
while($get_gene_display_xref->fetch()){
$update_gene_sth->execute($xref_id+$xref_offset, $gene_id);
if (defined($desc) and $desc ne "") {
$desc .= " [Source:".$source_id_to_external_name{$source_id}.";Acc:".$label."]";
$update_gene_desc_sth->execute($desc,$gene_id);
$set_desc_done_sth->execute($gene_id);
$gene_count++;
}
}
$update_gene_desc_sth->finish;
$update_gene_sth->finish;
print "$gene_count gene descriptions added\n" if($self->verbose);
$get_tran_display_xref->execute();
my $tran_id;
$get_tran_display_xref->bind_columns(\$tran_id, \$xref_id);
while($get_tran_display_xref->fetch()){
if(defined($xref_id)){
$update_tran_sth->execute($xref_id+$xref_offset, $tran_id);
if(!defined($tran_id) || !defined($xref_id) || !defined($xref_offset)){
print "PROB: tran_id = $tran_id\nxref_id = $xref_id\n$xref_offset = $xref_offset\n";
}
}
}
}
sub set_status{
my $self = shift;
Ian Longden
committed
# set all genes to NOVEL
Ian Longden
committed
my $reset_sth = $self->core->dbc->prepare('UPDATE gene SET status = "NOVEL"');
$reset_sth->execute();
$reset_sth->finish;
$reset_sth = $self->core->dbc->prepare('UPDATE transcript SET status = "NOVEL"');
$reset_sth->execute();
$reset_sth->finish;
Ian Longden
committed
my $update_gene_sth = $self->core->dbc->prepare('UPDATE gene SET status = ? where gene_id = ?');
my $update_tran_sth = $self->core->dbc->prepare('UPDATE transcript SET status = ? where transcript_id = ?');
my $known_xref_sql =(<<DXS);
select distinct
IF (ox.ensembl_object_type = 'Gene', gtt_gene.gene_id,
IF (ox.ensembl_object_type = 'Transcript', gtt_transcript.gene_id,
gtt_translation.gene_id)) AS gene_id,
IF (ox.ensembl_object_type = 'Gene', gtt_gene.transcript_id,
IF (ox.ensembl_object_type = 'Transcript', gtt_transcript.transcript_id,
gtt_translation.transcript_id)) AS transcript_id
from ( source s
join ( xref x
join ( object_xref ox
) using (xref_id)
) using (source_id)
)
left join gene_transcript_translation gtt_gene
on (gtt_gene.gene_id = ox.ensembl_id)
left join gene_transcript_translation gtt_transcript
on (gtt_transcript.transcript_id = ox.ensembl_id)
left join gene_transcript_translation gtt_translation
on (gtt_translation.translation_id = ox.ensembl_id)
where ox.ox_status = 'DUMP_OUT'
Monika Komorowska
committed
AND s.status like 'KNOWN%'
AND ox.linkage_type <> 'DEPENDENT'
ORDER BY gene_id DESC, transcript_id DESC
DXS
my $last_gene = 0;
my $known_xref_sth = $self->xref->dbc->prepare($known_xref_sql);
$known_xref_sth->execute();
my ($gene_id, $transcript_id); # remove labvel after testig it is not needed
$known_xref_sth->bind_columns(\$gene_id, \$transcript_id);
while($known_xref_sth->fetch()){
if($gene_id != $last_gene){
Ian Longden
committed
$update_gene_sth->execute("KNOWN",$gene_id);
$last_gene = $gene_id;
}
Ian Longden
committed
$update_tran_sth->execute("KNOWN",$transcript_id);
}
# 1) load list of stable_gene_id from xref database and covert to internal id in
# new core database table.
# Use this table to reset havana gene/transcript status.
if(!scalar(keys %genes_to_transcripts)){
$self->build_genes_to_transcripts();
Ian Longden
committed
#
# Reset status for those from vega
Ian Longden
committed
#
# my %gene_id_to_status;
Ian Longden
committed
my $gene_status_sth = $self->xref->dbc->prepare("SELECT gsi.internal_id, hs.status FROM gene_stable_id gsi, havana_status hs WHERE hs.stable_id = gsi.stable_id")
|| die "Could not prepare gene_status_sth";
$gene_status_sth->execute();
my ($internal_id, $status);
$gene_status_sth->bind_columns(\$internal_id,\$status);
while($gene_status_sth->fetch()){
# $gene_id_to_status{$internal_id} = $status;
$update_gene_sth->execute($status, $internal_id);
Ian Longden
committed
}
$gene_status_sth->finish();
#
# need to create a transcript_id to status hash
#
# my %transcript_id_to_status;
Ian Longden
committed
my $transcript_status_sth = $self->xref->dbc->prepare("SELECT tsi.internal_id, hs.status FROM transcript_stable_id tsi, havana_status hs WHERE hs.stable_id = tsi.stable_id")
|| die "Could not prepare transcript_status_sth";
$transcript_status_sth->execute();
$transcript_status_sth->bind_columns(\$internal_id,\$status);
while($transcript_status_sth->fetch()){
# $transcript_id_to_status{$internal_id} = $status;
$update_tran_sth->execute($status,$internal_id);
Ian Longden
committed
}
$transcript_status_sth->finish();
$known_xref_sth->finish;
$update_gene_sth->finish;
$update_tran_sth->finish;
}
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
sub load_translation_to_transcript{
my ($self) = @_;
my $sth = $self->core->dbc->prepare("SELECT translation_id, transcript_id FROM translation");
$sth->execute();
my ($translation_id, $transcript_id);
$sth->bind_columns(\$translation_id, \$transcript_id);
while ($sth->fetch()) {
$translation_to_transcript{$translation_id} = $transcript_id;
$transcript_to_translation{$transcript_id} = $translation_id if ($translation_id);
}
}
sub build_genes_to_transcripts {
my ($self) = @_;
my $sql = "SELECT gene_id, transcript_id, seq_region_start, seq_region_end FROM transcript";
my $sth = $self->core->dbc->prepare($sql);
$sth->execute();
my ($gene_id, $transcript_id, $start, $end);
$sth->bind_columns(\$gene_id, \$transcript_id, \$start, \$end);
# Note %genes_to_transcripts is global
while ($sth->fetch()) {
push @{$genes_to_transcripts{$gene_id}}, $transcript_id;
$transcript_length{$transcript_id} = $end- $start;
}
$sth->finish
}
sub build_gene_transcript_status{
# Creates the files that contain the SQL needed to (re)set the
# gene.status and transcript.status values
my $self = shift;
my $reset_sth = $self->core->dbc->prepare('UPDATE gene SET status = "NOVEL"');
$reset_sth->execute();
$reset_sth->finish;
$reset_sth = $self->core->dbc->prepare('UPDATE transcript SET status = "NOVEL"');
$reset_sth->execute();
$reset_sth->finish;
my $update_gene_sth = $self->core->dbc->prepare('UPDATE gene SET status = "KNOWN" where gene_id = ?');
my $update_tran_sth = $self->core->dbc->prepare('UPDATE transcript SET status = "KNOWN" where transcript_id = ?');
#create a hash known which ONLY has databases names of those that are KNOWN and KNOWNXREF
my %known;
my $sth = $self->core->dbc->prepare("select db_name from external_db where status in ('KNOWNXREF','KNOWN')");
$sth->execute();
my ($name);
$sth->bind_columns(\$name);
while($sth->fetch){
$known{$name} = 1;
}
$sth->finish;
# loop throught the gene and all transcript until you find KNOWN/KNOWNXREF as a status
my $ensembl = $self->core;
my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(-dbconn => $ensembl->dbc);
my $gene_adaptor = $db->get_GeneAdaptor();
my @genes = @{$gene_adaptor->fetch_all()};
while (my $gene = shift @genes){
my $gene_found = 0;
my @dbentries = @{$gene->get_all_DBEntries()};
foreach my $dbe (@dbentries){
if(defined($known{$dbe->dbname})){
$gene_found =1;
}
}
my $one_tran_found = 0;
foreach my $tr (@{$gene->get_all_Transcripts}){
my $tran_found = 0;
foreach my $dbe (@{$tr->get_all_DBLinks}){
if(defined($known{$dbe->dbname})){
$tran_found = 1;
$one_tran_found = 1;
}
}
if($tran_found or $gene_found){
$update_tran_sth->execute($tr->dbID);
}
}
if($gene_found or $one_tran_found){
$update_gene_sth->execute($gene->dbID);
}
}
return;
}
sub build_meta_timestamp{
# Creates a file that contains the SQL needed to (re)set the
# 'xref.timestamp' key of the meta table.
my $self = shift;
my $sth = $self->core->dbc->prepare("DELETE FROM meta WHERE meta_key='xref.timestamp'");
$sth->execute();
$sth->finish;
$sth = $self->core->dbc->prepare("INSERT INTO meta (meta_key,meta_value) VALUES ('xref.timestamp', NOW())");
$sth->execute();
$sth->finish;
return;
}
Ian Longden
committed
sub set_display_xrefs{
Ian Longden
committed
my $self = shift;
print "Building Transcript and Gene display_xrefs using xref database\n" if ($self->verbose);
my $xref_offset = $self->get_meta_value("xref_offset");
print "Using xref_off set of $xref_offset\n" if($self->verbose);
my $reset_sth = $self->core->dbc->prepare("UPDATE gene SET display_xref_id = null");
$reset_sth->execute();
$reset_sth->finish;
$reset_sth = $self->core->dbc->prepare("UPDATE transcript SET display_xref_id = null");
$reset_sth->execute();
$reset_sth->finish;
my $update_gene_sth = $self->core->dbc->prepare("UPDATE gene g SET g.display_xref_id= ? WHERE g.gene_id=?");
my $update_tran_sth = $self->core->dbc->prepare("UPDATE transcript t SET t.display_xref_id= ? WHERE t.transcript_id=?");
Monika Komorowska
committed
#get hash for sources in hash
#get priority description
Ian Longden
committed
my $sql =(<<SQL);
CREATE TABLE display_xref_prioritys(
source_id INT NOT NULL,
priority INT NOT NULL,
PRIMARY KEY (source_id)
) COLLATE=latin1_swedish_ci ENGINE=InnoDB
Ian Longden
committed
SQL
my $sth = $self->xref->dbc->prepare($sql);
$sth->execute;
$sth->finish;
my $presedence;
my $ignore;
if( $self->mapper->can("transcript_display_xref_sources") ){
($presedence, $ignore) = @{$self->mapper->transcript_display_xref_sources(1)}; # FULL update mode pass 1
}
else{
($presedence, $ignore) = @{$self->transcript_display_xref_sources(1)}; # FULL update mode pass 1
}
Ian Longden
committed
my $i=0;
my $ins_p_sth = $self->xref->dbc->prepare("INSERT into display_xref_prioritys (source_id, priority) values(?, ?)");
my $get_source_id_sth = $self->xref->dbc->prepare("select source_id from source where name like ? order by priority desc");
Ian Longden
committed
#
# So the higher the number the better then
#
my $last_name = "";
print "Presedence for the display xrefs\n" if($self->verbose);
Ian Longden
committed
foreach my $name (reverse (@$presedence)){
$i++;
$get_source_id_sth->execute($name);
my $source_id;
$get_source_id_sth->bind_columns(\$source_id);
while($get_source_id_sth->fetch){
$ins_p_sth->execute($source_id, $i);
if($name ne $last_name){
print "\t$name\t$i\n" if ($self->verbose);
}
$last_name = $name;
}
Ian Longden
committed
}
$ins_p_sth->finish;
$get_source_id_sth->finish;
# Set status to 'NO_DISPLAY' for those that match the ignore REGEXP in object_xref
# Xrefs have already been dump to core etc so no damage done.
#
my $update_ignore_sth = $self->xref->dbc->prepare('UPDATE object_xref SET ox_status = "NO_DISPLAY" where object_xref_id = ?');
foreach my $ignore_sql (values %$ignore){
print "IGNORE SQL: $ignore_sql\n" if($self->verbose);
my $ignore_sth = $self->xref->dbc->prepare($ignore_sql);
my $gene_count = 0;
$ignore_sth->execute();
my ($object_xref_id);
$ignore_sth->bind_columns(\$object_xref_id);
while($ignore_sth->fetch()){
$update_ignore_sth->execute($object_xref_id);
}
$ignore_sth->finish;
}
$update_ignore_sth->finish;
#
# Do a similar thing for those with a display_label that is just numeric;
#
$update_ignore_sth = $self->xref->dbc->prepare('UPDATE object_xref ox, source s, xref x SET ox_status = "NO_DISPLAY" where ox_status like "DUMP_OUT" and s.source_id = x.source_id and x.label REGEXP "^[0-9]+$" and ox.xref_id = x.xref_id');
$update_ignore_sth->execute();
$update_ignore_sth->finish;
Ian Longden
committed
#######################################################################
my $display_xref_sql =(<<DXS);
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
select IF (ox.ensembl_object_type = 'Gene', gtt_gene.gene_id,
IF (ox.ensembl_object_type = 'Transcript', gtt_transcript.gene_id,
gtt_translation.gene_id)) AS gene_id,
IF (ox.ensembl_object_type = 'Gene', gtt_gene.transcript_id,
IF (ox.ensembl_object_type = 'Transcript', gtt_transcript.transcript_id,
gtt_translation.transcript_id)) AS transcript_id,
p.priority as priority,
x.xref_id,
ox.ensembl_object_type as object_type,
x.label as label
from ( display_xref_prioritys p
join ( source s
join ( xref x
join ( object_xref ox
join ( identity_xref ix
) using (object_xref_id)
) using (xref_id)
) using (source_id)
) using (source_id)
)
left join gene_transcript_translation gtt_gene
on (gtt_gene.gene_id = ox.ensembl_id)
left join gene_transcript_translation gtt_transcript
on (gtt_transcript.transcript_id = ox.ensembl_id)
left join gene_transcript_translation gtt_translation
on (gtt_translation.translation_id = ox.ensembl_id)
where ox.ox_status = 'DUMP_OUT'
Ian Longden
committed
order by gene_id DESC, p.priority DESC, (ix.target_identity+ix.query_identity) DESC, ox.unused_priority DESC
Ian Longden
committed
DXS
Ian Longden
committed
########################################################################
my %seen_transcript; # first time we see it is the best due to ordering :-)
# so either write data to database or store
# my $gene_sth = $self->core->dbc->prepare("select x.display_label from gene g, xref x where g.display_xref_id = x.xref_id and g.gene_id = ?");
# my $tran_sth = $self->core->dbc->prepare("select x.display_label from transcript t, xref x where t.display_xref_id = x.xref_id and t.transcript_id = ?");
Ian Longden
committed
my $last_gene = 0;
my $display_xref_sth = $self->xref->dbc->prepare($display_xref_sql);
my $gene_count = 0;
Ian Longden
committed
$display_xref_sth->execute();
my ($gene_id, $transcript_id, $p, $xref_id, $type, $label); # remove labvel after testig it is not needed
$display_xref_sth->bind_columns(\$gene_id, \$transcript_id, \$p, \$xref_id, \$type, \$label);
while($display_xref_sth->fetch()){
if($gene_id != $last_gene){
$update_gene_sth->execute($xref_id+$xref_offset, $gene_id);
$last_gene = $gene_id;
$gene_count++;
}
if($type ne "Gene"){
if(!defined($seen_transcript{$transcript_id})){ # not seen yet so its the best
$update_tran_sth->execute($xref_id+$xref_offset, $transcript_id);
Ian Longden
committed
}
$seen_transcript{$transcript_id} = $xref_id+$xref_offset;
}
Ian Longden
committed
}
$display_xref_sth->finish;
$update_gene_sth->finish;
$update_tran_sth->finish;
Ian Longden
committed
# reset the status to DUMP_OUT fro thise that where ignored for the display_xref;
#
my $reset_status_sth = $self->xref->dbc->prepare('UPDATE object_xref SET ox_status = "DUMP_OUT" where ox_status = "NO_DISPLAY"');
$reset_status_sth->execute();
$reset_status_sth->finish;
$sth = $self->xref->dbc->prepare("drop table display_xref_prioritys");
$sth->execute || die "Could not drop temp table display_xref_prioritys\n";
$sth->finish;
print "Updated $gene_count display_xrefs for genes\n" if($self->verbose);
Ian Longden
committed
}
# Remove after sure everything is cool
Ian Longden
committed
my $self = shift;
my $id = shift;
my $label = shift;
my $sth = shift;
my $type = shift;
$sth->execute($id);
my $old_label;
$sth->bind_columns(\$old_label);
$sth->fetch;
if($old_label ne $label){
print "ERROR: $type ($id) has different display_xrefs ??? old:$old_label new:$label\n";
}
}
Arnaud Kerhornou
committed
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
sub set_source_id_to_external_name {
my $self = shift;
my $name_to_external_name_href = shift;
my $source_id_to_external_name_href = {};
my $name_to_source_id_href = {};
my $sql = 'select s.source_id, s.name from source s, xref x where x.source_id = s.source_id group by s.source_id'; # only get those of interest
my $sth = $self->xref->dbc->prepare($sql);
$sth->execute();
my ($id, $name);
$sth->bind_columns(\$id, \$name);
while($sth->fetch()){
if(defined($name_to_external_name_href->{$name})){
$source_id_to_external_name_href->{$id} = $name_to_external_name_href->{$name};
$name_to_source_id_href->{$name} = $id;
}
elsif($name =~ /notransfer$/){
}
else{
die "ERROR: Could not find $name in external_db table please add this too continue";
}
}
$sth->finish;
return ($source_id_to_external_name_href, $name_to_source_id_href);
}
sub set_gene_descriptions{
my $only_those_not_set = shift || 0;
my $update_gene_desc_sth = $self->core->dbc->prepare("UPDATE gene SET description = ? where gene_id = ?");
if(!$only_those_not_set){
my $reset_sth = $self->core->dbc->prepare("UPDATE gene SET description = null");
$reset_sth->execute();
$reset_sth->finish;
}
my %ignore;
if($only_those_not_set){
print "Only setting those not already set\n";
my $sql = "select internal_id from gene_stable_id where desc_set = 1";
my $sql_sth = $self->xref->dbc->prepare($sql);
$sql_sth->execute;
my $id;
$sql_sth->bind_columns(\$id);
while($sql_sth->fetch){
$ignore{$id} = 1;
}
$sql_sth->finish;
}
##########################################
# Get source_id to external_disaply_name #
##########################################
my %name_to_external_name;
my $sql = "select external_db_id, db_name, db_display_name from external_db";
my $sth = $self->core->dbc->prepare($sql);
$sth->execute();
my ($id, $name, $display_name);
$sth->bind_columns(\$id, \$name, \$display_name);
while($sth->fetch()){
$name_to_external_name{$name} = $display_name;
}
$sth->finish;
Arnaud Kerhornou
committed
my ($source_id_to_external_name_href, $name_to_source_id_href);
if( $self->mapper->can("set_source_id_to_external_name") ){
($source_id_to_external_name_href, $name_to_source_id_href) = $self->mapper->set_source_id_to_external_name (\%name_to_external_name);
}
else{
($source_id_to_external_name_href, $name_to_source_id_href) = $self->set_source_id_to_external_name (\%name_to_external_name);
}
Arnaud Kerhornou
committed
my %source_id_to_external_name = %$source_id_to_external_name_href;
my %name_to_source_id = %$name_to_source_id_href;
CREATE TABLE gene_desc_prioritys(
source_id INT NOT NULL,
priority INT NOT NULL,
PRIMARY KEY (source_id)
) COLLATE=latin1_swedish_ci ENGINE=InnoDB
$sth = $self->xref->dbc->prepare($sql);
$sth->execute;
$sth->finish;
my @presedence;
my @regexps;
if( $self->mapper->can("gene_description_sources") ){
@presedence = $self->mapper->gene_description_sources();
}
else{
@presedence = $self->gene_description_sources();
}
if( $self->mapper->can("gene_description_filter_regexps") ){
@regexps = $self->mapper->gene_description_filter_regexps();
}
else{
@regexps = $self->gene_description_filter_regexps();
}
my $i=0;
my $ins_p_sth = $self->xref->dbc->prepare("INSERT into gene_desc_prioritys (source_id, priority) values(?, ?)");
my $get_source_id_sth = $self->xref->dbc->prepare("select source_id from source where name like ?");
#
# So the higher the number the better then
#
print "Presedence for Gene Descriptions\n" if($self->verbose);
my $last_name = "";
foreach my $name (reverse (@presedence)){
$i++;
$get_source_id_sth->execute($name);
my $source_id;
$get_source_id_sth->bind_columns(\$source_id);
while($get_source_id_sth->fetch){
$ins_p_sth->execute($source_id, $i);
if($last_name ne $name){
print "\t$name\t$i\n" if ($self->verbose);
}
$last_name = $name;
}
}
$ins_p_sth->finish;
$get_source_id_sth->finish;
#######################################################################
my $gene_desc_sql =(<<DXS);
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
select IF (ox.ensembl_object_type = 'Gene', gtt_gene.gene_id,
IF (ox.ensembl_object_type = 'Transcript', gtt_transcript.gene_id,
gtt_translation.gene_id)) AS gene_id,
x.description AS description,
s.source_id AS source_id,
x.accession AS accession
from ( gene_desc_prioritys p
join ( source s
join ( xref x
join ( object_xref ox
join ( identity_xref ix
) using (object_xref_id)
) using (xref_id)
) using (source_id)
) using (source_id)
)
left join gene_transcript_translation gtt_gene
on (gtt_gene.gene_id = ox.ensembl_id)
left join gene_transcript_translation gtt_transcript
on (gtt_transcript.transcript_id = ox.ensembl_id)
left join gene_transcript_translation gtt_translation
on (gtt_translation.translation_id = ox.ensembl_id)
where ox.ox_status = 'DUMP_OUT'
order by gene_id desc,
p.priority desc,
(ix.target_identity+ix.query_identity) desc
########################################################################
my $gene_sth = $self->core->dbc->prepare("select g.description from gene g where g.gene_id = ?");
my $last_gene = 0;
my %no_source_name_in_desc;
if( $self->mapper->can("no_source_label_list") ){
foreach my $name (@{$self->mapper->no_source_label_list()}){
my $id = $name_to_source_id{$name};
print "$name will not have [Source:...] info in desc\n";
$no_source_name_in_desc{$id} = 1;
}
}
my $gene_desc_sth = $self->xref->dbc->prepare($gene_desc_sql);
$gene_desc_sth->execute();
my ($gene_id, $desc,$source_id,$label); # remove labvel after testig it is not needed
$gene_desc_sth->bind_columns(\$gene_id, \$desc, \$source_id, \$label);
my $gene_count = 0;
while($gene_desc_sth->fetch()){
# print "$gene_id, $transcript_id, $p, $xref_id, $type, $label\n";
next if(defined($ignore{$gene_id}));
Ian Longden
committed
if($gene_id != $last_gene and defined($desc) ){
my $filtered_description = $self->filter_by_regexp($desc, \@regexps);
if ($filtered_description ne "") {
if(!defined($no_source_name_in_desc{$source_id})){
$desc .= " [Source:".$source_id_to_external_name{$source_id}.";Acc:".$label."]";
}
$update_gene_desc_sth->execute($desc,$gene_id);
$gene_count++;
$last_gene = $gene_id;
}
$update_gene_desc_sth->finish;
print "$gene_count gene descriptions added\n";# if($self->verbose);
$sth = $self->xref->dbc->prepare("drop table gene_desc_prioritys");
$sth->execute || die "Could not drop temp table gene_desc_prioritys\n";
$sth->finish;
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
}
sub filter_by_regexp {
my ($self, $str, $regexps) = @_;
foreach my $regexp (@$regexps) {
$str =~ s/$regexp//ig;
}
return $str;
}
sub check_desc{
my $self = shift;
my $id = shift;
my $desc = shift;
my $sth = shift;
my $type = shift;
$sth->execute($id);
my $old_desc;
$sth->bind_columns(\$old_desc);
$sth->fetch;
if($old_desc ne $desc){
print "ERROR: $type ($id) has different descriptions ??? \n\told:$old_desc \n\tnew:$desc\n";
Ian Longden
committed