Skip to content
Snippets Groups Projects
Commit 4239b99e authored by Ian Longden's avatar Ian Longden
Browse files

DirectXrefs code moved to DirectXref.pm module and this module now only loads...

DirectXrefs code moved to DirectXref.pm module and this module now only loads data form the core database into the xref one
parent 684270d1
No related branches found
No related tags found
No related merge requests found
package XrefMapper::CoreInfo;
use strict;
use warnings;
use vars '@ISA';
@ISA = qw{ XrefMapper::BasicMapper };
use strict;
use warnings;
use XrefMapper::BasicMapper;
use Cwd;
......@@ -22,14 +22,6 @@ use IPC::Open3;
# translation_stable_id
# Also for Direct xref process these and add to object_xref
# also add dependents while we are at it.
sub new {
my($class, $mapper) = @_;
......@@ -44,203 +36,103 @@ sub new {
sub get_core_data {
my $self = shift;
my $self = shift;
# gene_transcript_translation
# gene_stable_id
# transcript_stable_id
# translation_stable_id
# gene_transcript_translation
# gene_stable_id
# transcript_stable_id
# translation_stable_id
$self->set_status_for_source_from_core();
# Get the status for the sources from the core database to work out status's later
my %external_name_to_status;
my $sth = $self->core->dbc->prepare('select db_name, status from external_db where status like "KNOWN%"');
$sth->execute();
my ($name, $status, $id);
$sth->bind_columns(\$name,\$status);
while($sth->fetch()){
$external_name_to_status{$name} = $status;
}
$sth->finish;
my $sth_up = $self->xref->dbc->prepare("update source set status = 'KNOWN' where source_id = ?");
my $sql = 'select s.source_id, s.name from source s, xref x where x.source_id = s.source_id group by s.source_id'; # only get those of interest
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$id, \$name);
while($sth->fetch()){
if(defined($external_name_to_status{$name})){
# set status
$sth_up->execute($id);
}
}
$sth->finish;
$sth_up->finish;
my $object_xref_id;
my $ox_sth = $self->xref->dbc->prepare("select max(object_xref_id) from object_xref");
$ox_sth->execute();
$ox_sth->bind_columns(\$object_xref_id);
$ox_sth->fetch();
$ox_sth->finish;
# load table gene_transcript_translation
my $ins_sth = $self->xref->dbc->prepare("insert into gene_transcript_translation (gene_id, transcript_id, translation_id) values (?, ?, ?)");
$sql = "select tn.gene_id, tn.transcript_id, tl.translation_id from transcript tn left join translation tl on tl.transcript_id = tn.transcript_id";
$sth = $self->core->dbc->prepare($sql);
$sth->execute();
my ($gene_id, $transcript_id, $translation_id);
$sth->bind_columns(\$gene_id, \$transcript_id, \$translation_id);
while($sth->fetch()){
$ins_sth->execute($gene_id, $transcript_id, $translation_id);
}
$ins_sth->finish;
$sth->finish;
# load table xxx_stable_id
my ($stable_id);
foreach my $table (qw(gene transcript translation)){
my $sth = $self->core->dbc->prepare("select ".$table."_id, stable_id from ".$table."_stable_id");
my $ins_sth = $self->xref->dbc->prepare("insert into ".$table."_stable_id (internal_id, stable_id) values(?, ?)");
$sth->execute();
$sth->bind_columns(\$id, \$stable_id);
while($sth->fetch){
$ins_sth->execute($id, $stable_id);
}
$ins_sth->finish;
$sth->finish;
}
$sth = $self->xref->dbc->prepare("insert into process_status (status, date) values('core_data_loaded',now())");
$sth->execute();
$sth->finish;
# Now process the direct xrefs and add data to the object xrefs remember dependent xrefs.
my $ins_ox_sth = $self->xref->dbc->prepare("insert into object_xref (object_xref_id, ensembl_id, xref_id, ensembl_object_type, linkage_type) values(?, ?, ?, ?, ?)");
my $ins_ox_sth2 = $self->xref->dbc->prepare("insert into object_xref (object_xref_id, ensembl_id, xref_id, ensembl_object_type, linkage_type, master_xref_id) values(?, ?, ?, ?, ?, ?)");
# Direct xrefs can be considered to be 100% matching
my $ins_ix_sth = $self->xref->dbc->prepare("insert into identity_xref (object_xref_id, query_identity, target_identity) values(?, 100, 100)");
# my $update_dependent_xref_sth = $self->xref->dbc->prepare("update dependent_xref set object_xref_id = ? where master_xref_id = ? and dependent_xref_id =?");
local $ins_ox_sth->{RaiseError}; # want to see duplicates and not add de
local $ins_ox_sth->{PrintError};
local $ins_ox_sth2->{RaiseError}; # want to see duplicates and not add de
local $ins_ox_sth2->{PrintError};
my $ins_go_sth = $self->xref->dbc->prepare("insert into go_xref (object_xref_id, linkage_type, source_xref_id) values(?,?,?)");
my $insert_go_sth = $self->xref->dbc->prepare("insert into go_xref (object_xref_id, linkage_type) values(?,?)");
my $dep_sth = $self->xref->dbc->prepare("select dependent_xref_id, linkage_annotation from dependent_xref where master_xref_id = ?");
my $stable_sql=(<<SQL);
SELECT so.name, dx.general_xref_id, s.internal_id, dx.ensembl_stable_id , dx.linkage_xref
FROM source so, xref x, TYPE_direct_xref dx left join TYPE_stable_id s on s.stable_id = dx.ensembl_stable_id
WHERE x.xref_id = dx.general_xref_id and x.source_id = so.source_id
SQL
my %err_count;
foreach my $table (qw(gene transcript translation)){
my ($dbname, $xref_id, $internal_id, $stable_id, $linkage_type);
my $sql = $stable_sql;
$sql =~ s/TYPE/$table/g;
my $sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$dbname, \$xref_id, \$internal_id, \$stable_id, \$linkage_type);
my $count =0;
my $duplicate_direct_count = 0;
my $duplicate_dependent_count = 0;
while($sth->fetch){
if(!defined($internal_id)){ # not found either it is an internal id already or stable_id no longer exists
if($stable_id =~ /^\d+$/){
$internal_id = $stable_id;
}
else{
if(!defined($err_count{$dbname}) or $err_count{$dbname} < 10){
print "Could not find stable id $stable_id in table to get the internal id hence ignoring!!! (for $dbname)\n" if($self->verbose);
}
$err_count{$dbname}++;
next;
}
}
$object_xref_id++;
$count++;
my @master_xref_ids;
$ins_ox_sth->execute($object_xref_id, $internal_id, $xref_id, $table, 'DIRECT');
if($ins_ox_sth->err){
$duplicate_direct_count++;
next; #duplicate
}
else{
$ins_ix_sth->execute($object_xref_id);
push @master_xref_ids, $xref_id;
}
if(defined($linkage_type) and $linkage_type ne ""){
$insert_go_sth->execute($object_xref_id, $linkage_type);
}
while(my $master_xref_id = pop(@master_xref_ids)){
my ($dep_xref_id, $link);
$dep_sth->execute($master_xref_id);
$dep_sth->bind_columns(\$dep_xref_id, \$link);
while($dep_sth->fetch){
$object_xref_id++;
$ins_ox_sth2->execute($object_xref_id, $internal_id, $dep_xref_id, $table, 'DEPENDENT', $master_xref_id);
if($ins_ox_sth2->err){
my $err = $ins_ox_sth2->errstr;
if($err =~ /Duplicate/){
$duplicate_dependent_count++;
next;
}
else{
die "Problem loading error is $err\n";
}
}
$ins_ix_sth->execute($object_xref_id);
# $update_dependent_xref_sth->execute($object_xref_id, $master_xref_id, $dep_xref_id);
push @master_xref_ids, $dep_xref_id; # get the dependent, dependents just in case
if(defined($link) and $link ne ""){ # we have a go term linkage type
$ins_go_sth->execute($object_xref_id, $link, $master_xref_id);
}
}
}
}
$sth->finish;
if($duplicate_direct_count or $duplicate_dependent_count){
print "duplicate entrys ignored for $duplicate_direct_count direct xrefs and $duplicate_dependent_count dependent xrefs\n" if($self->verbose);
}
# if($err_count or $self->verbose){
# print STDERR $count." direct_xrefs added to ensembl ".$table."s BUT $err_count stable ids could not be found\n";
# }
}
foreach my $key ( keys %err_count){
print STDERR "*WARNING*: ".$err_count{$key}." direct xrefs for database ".$key." could not be added as their stable_ids could not be found\n";
}
$ins_go_sth->finish;
$ins_ox_sth->finish;
$dep_sth->finish;
$sth = $self->xref->dbc->prepare("insert into process_status (status, date) values('direct_xrefs_parsed',now())");
$sth->execute();
$sth->finish;
# load table gene_transcript_translation
$self->load_gene_transcript_translation();
# load table xxx_stable_id
$self->load_stable_ids();
my $sth = $self->xref->dbc->prepare("insert into process_status (status, date) values('core_data_loaded',now())");
$sth->execute();
$sth->finish;
return;
}
sub set_status_for_source_from_core{
my ($self) = shift;
# Get the status for the sources from the core database to work out status's later
my %external_name_to_status;
my $sth = $self->core->dbc->prepare('select db_name, status from external_db where status like "KNOWN%"');
$sth->execute();
my ($name, $status, $id);
$sth->bind_columns(\$name,\$status);
while($sth->fetch()){
$external_name_to_status{$name} = $status;
}
$sth->finish;
my $sth_up = $self->xref->dbc->prepare("update source set status = 'KNOWN' where source_id = ?");
my $sql = 'select s.source_id, s.name from source s, xref x where x.source_id = s.source_id group by s.source_id'; # only get those of interest
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$id, \$name);
while($sth->fetch()){
if(defined($external_name_to_status{$name})){
# set status
$sth_up->execute($id);
}
}
$sth->finish;
$sth_up->finish;
return;
}
sub load_gene_transcript_translation{
my ($self) = shift;
my $ins_sth = $self->xref->dbc->prepare("insert into gene_transcript_translation (gene_id, transcript_id, translation_id) values (?, ?, ?)");
my $sql = "select tn.gene_id, tn.transcript_id, tl.translation_id from transcript tn left join translation tl on tl.transcript_id = tn.transcript_id";
my $sth = $self->core->dbc->prepare($sql);
$sth->execute();
my ($gene_id, $transcript_id, $translation_id);
$sth->bind_columns(\$gene_id, \$transcript_id, \$translation_id);
while($sth->fetch()){
$ins_sth->execute($gene_id, $transcript_id, $translation_id);
}
$ins_sth->finish;
$sth->finish;
return;
}
sub load_stable_ids{
my ($self) = shift;
my ($id, $stable_id);
foreach my $table (qw(gene transcript translation)){
my $sth = $self->core->dbc->prepare("select ".$table."_id, stable_id from ".$table."_stable_id");
my $ins_sth = $self->xref->dbc->prepare("insert into ".$table."_stable_id (internal_id, stable_id) values(?, ?)");
$sth->execute();
$sth->bind_columns(\$id, \$stable_id);
while($sth->fetch){
$ins_sth->execute($id, $stable_id);
}
$ins_sth->finish;
$sth->finish;
}
return;
}
1;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment