Commit 9c6ea761 authored by Ian Longden's avatar Ian Longden
Browse files

XrefMapper/XrefLoader.pm

parent 99ff8cb8
......@@ -139,6 +139,25 @@ sub update{
$sth->finish;
$transaction_end_sth->execute();
#}
######################################################
# Sort out Interpro (xrefs added by unmapped entries)
######################################################
my $interpro_test_sth = $self->xref->dbc->prepare('select count(1) from xref x, source s where x.source_id = s.source_id and s.name like "InterPro"');
$interpro_test_sth->execute();
my ($int_count);
$interpro_test_sth->bind_columns(\$int_count);
$interpro_test_sth->fetch();
if(defined($int_count) and $int_count){
my $ex_id = $name_to_external_db_id{"InterPro"};
$xref_sth->execute($ex_id);
}
$interpro_test_sth->finish;
$synonym_sth->finish;
$go_sth->finish;
$identity_sth->finish;
......@@ -187,17 +206,17 @@ sub update{
# Now add the new ones #
#####################################
###########################
# SQL to get data from xref
###########################
my $direct_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type from xref x, object_xref ox where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id');
# $dependent_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, d.master_xref_id from xref x, object_xref ox, dependent_xref d where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and d.object_xref_id = ox.object_xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id');
$dependent_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, ox.master_xref_id from xref x, object_xref ox where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id');
###########################
# SQL to get data from xref
###########################
my $direct_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type from xref x, object_xref ox where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id');
# $dependent_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, d.master_xref_id from xref x, object_xref ox, dependent_xref d where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and d.object_xref_id = ox.object_xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id');
$dependent_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, ox.master_xref_id from xref x, object_xref ox where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id');
my $go_sql =(<<GSQL);
SELECT x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, ox.master_xref_id, g.linkage_type
FROM (xref x, object_xref ox, go_xref g)
......@@ -208,28 +227,30 @@ sub update{
order by x.xref_id, ox.ensembl_id
GSQL
$go_sth = $self->xref->dbc->prepare($go_sql);
$go_sth = $self->xref->dbc->prepare($go_sql);
my $seq_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, i.query_identity, i.target_identity, i.hit_start, i.hit_end, i.translation_start, i.translation_end, i.cigar_line, i.score, i.evalue from xref x, object_xref ox, identity_xref i where ox.ox_status = "DUMP_OUT" and i.object_xref_id = ox.object_xref_id and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id');
my $seq_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, i.query_identity, i.target_identity, i.hit_start, i.hit_end, i.translation_start, i.translation_end, i.cigar_line, i.score, i.evalue from xref x, object_xref ox, identity_xref i where ox.ox_status = "DUMP_OUT" and i.object_xref_id = ox.object_xref_id and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id');
########################
# SQL to add data to core
#########################
my $add_xref_sth = $self->core->dbc->prepare('insert into xref (xref_id, external_db_id, dbprimary_acc, display_label, version, description, info_type, info_text) values (?, ?, ?, ?, ?, ?, ?, ?)');
my $add_object_xref_sth = $self->core->dbc->prepare('insert into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id, analysis_id) values (?, ?, ?, ?, ?)');
my $add_identity_xref_sth = $self->core->dbc->prepare('insert into identity_xref (object_xref_id, xref_identity, ensembl_identity, xref_start, xref_end, ensembl_start, ensembl_end, cigar_line, score, evalue) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
my $add_go_xref_sth = $self->core->dbc->prepare('insert into go_xref (object_xref_id, linkage_type) values (?, ?)');
my $add_dependent_xref_sth = $self->core->dbc->prepare('insert ignore into dependent_xref (object_xref_id, master_xref_id, dependent_xref_id) values (?, ?, ?)');
my $add_syn_sth = $self->core->dbc->prepare('insert ignore into external_synonym (xref_id, synonym) values (?, ?)');
########################
# SQL to add data to core
#########################
my $add_xref_sth = $self->core->dbc->prepare('insert into xref (xref_id, external_db_id, dbprimary_acc, display_label, version, description, info_type, info_text) values (?, ?, ?, ?, ?, ?, ?, ?)');
my $add_object_xref_sth = $self->core->dbc->prepare('insert into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id, analysis_id) values (?, ?, ?, ?, ?)');
my $add_identity_xref_sth = $self->core->dbc->prepare('insert into identity_xref (object_xref_id, xref_identity, ensembl_identity, xref_start, xref_end, ensembl_start, ensembl_end, cigar_line, score, evalue) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
my $add_go_xref_sth = $self->core->dbc->prepare('insert into go_xref (object_xref_id, linkage_type) values (?, ?)');
my $add_dependent_xref_sth = $self->core->dbc->prepare('insert ignore into dependent_xref (object_xref_id, master_xref_id, dependent_xref_id) values (?, ?, ?)');
my $add_syn_sth = $self->core->dbc->prepare('insert ignore into external_synonym (xref_id, synonym) values (?, ?)');
$sth = $self->xref->dbc->prepare('select s.name, s.source_id, count(*), x.info_type, s.priority_description from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id and ox_status = "DUMP_OUT" group by s.name, s.source_id, x.info_type');
$sth->execute();
my ($type, $source_id, $where_from);
$sth->bind_columns(\$name,\$source_id, \$count, \$type, \$where_from);
$transaction_start_sth->execute();
while($sth->fetch()){
if(defined($where_from) and $where_from ne ""){
$where_from = "Generated via $where_from";
......
......@@ -112,6 +112,27 @@ sub run {
# If no RefSeq, use the Swissprot instead
my $seen = 0;
# store as list_only first
$self->add_xref($array[0], "", $array[1], $array[2], $hgnc_desc_only, $species_id, "MISC");
if (defined($array[3])) { # dead name, add to synonym
my @array2 = split(',\s*', $array[3]);
foreach my $arr (@array2){
XrefParser::BaseParser->add_to_syn($array[0], $hgnc_desc_only, $arr, $species_id);
}
}
if (defined($array[4])) { # alias, add to synonym
my @array2 = split(',\s*', $array[4]);
foreach my $arr (@array2){
XrefParser::BaseParser->add_to_syn($array[0], $hgnc_desc_only, $arr, $species_id);
}
}
if ($array[9]){ # Ensembl direct xref
$seen =1;
$ensembl_count++;
......@@ -218,21 +239,7 @@ sub run {
}
}
}
if(!$seen){ # Store to keep descriptions etc
$self->add_xref($array[0], "", $array[1], $array[2], $hgnc_desc_only, $species_id, "MISC");
if (defined($array[3])) { # dead name, add to synonym
my @array2 = split(',\s*', $array[3]);
foreach my $arr (@array2){
XrefParser::BaseParser->add_to_syn($array[0], $hgnc_desc_only, $arr, $species_id);
}
}
if (defined($array[4])) { # alias, add to synonym
my @array2 = split(',\s*', $array[4]);
foreach my $arr (@array2){
XrefParser::BaseParser->add_to_syn($array[0], $hgnc_desc_only, $arr, $species_id);
}
}
if(!$seen){
$mismatch++;
}
......
......@@ -58,8 +58,9 @@ sub run_script {
my $clone_source_id =
$self->get_source_id_for_source_name('Clone_based_vega_transcript');
my $curated_source_id =
$self->get_source_id_for_source_name('HGNC_curated_transcript');
my $hgnc_source_id =
$self->get_source_id_for_source_name('HGNC','havana');
my $sql = 'select tsi.stable_id, x.display_label from xref x, object_xref ox , transcript_stable_id tsi, external_db e where e.external_db_id = x.external_db_id and x.xref_id = ox.xref_id and tsi.transcript_id = ox.ensembl_id and e.db_name like ?';
......@@ -97,22 +98,122 @@ sub run_script {
my $xref_count = 0;
my $dbi = $self->dbi();
my %synonym;
my $dbname = "HGNC";
my $syn;
my $name;
$sth = $dbi->prepare('select es.synonym, x.label from synonym es, xref x, source s where x.xref_id = es.xref_id and x.source_id = s.source_id and s.name = "EntrezGene"' );
$sth->execute();
$sth->bind_columns(\$syn,\$name);
while($sth->fetch){
$synonym{$syn} = $name;
}
$sth->finish;
$sth = $dbi->prepare('select es.synonym, x.label from synonym es, xref x, source s where x.xref_id = es.xref_id and x.source_id = s.source_id and s.name = "'.$dbname.'" and s.priority_description like "desc_only"');
$sth->execute();
$sth->bind_columns(\$syn,\$name);
while($sth->fetch){
$synonym{$syn} = $name;
}
$sth->finish;
#get the source ids for HGNC sources
my (%accession, %version, %description);
$sql = 'select source_id from source where name like "HGNC" ';
$sql .= 'and priority_description like "desc_only" ';
$sth = $dbi->prepare($sql);
$sth->execute();
my ($hgnc_source_id);
$sth->bind_columns(\$hgnc_source_id);
my @arr;
while($sth->fetch()){
push @arr, $hgnc_source_id;
}
$sth->finish;
$sql = "select accession, label, version, description from xref where source_id in (".join(", ",@arr).")";
$sth = $dbi->prepare($sql);
$sth->execute();
my ($acc, $lab, $ver, $desc);
my $hgnc_loaded_count = 0;
$sth->bind_columns(\$acc, \$lab, \$ver, \$desc);
while (my @row = $sth->fetchrow_array()) {
$accession{$lab} = $acc;
$version{$lab} = $ver;
$description{$lab} = $desc;
$hgnc_loaded_count++;
}
$sth->finish;
if($hgnc_loaded_count == 0){
die "No point continuing no hgncs there\n";
}
my $not_in_hgnc = 0;
foreach my $ott (keys %ott_to_enst){
if(defined($ott_to_vega_name{$ott})){
my $id = $curated_source_id;
my $id = $hgnc_source_id;
my $name = $ott_to_vega_name{$ott};
my $acc = undef;
my $xref_id ;
if($name =~ /[.]/){
$id = $clone_source_id;
$name =~ s/[.]\d+//; #remove .number
$xref_id = $self->add_xref($name, "" , $name , $description{$name}, $id, $species_id, "DIRECT");
}
else{
my $copy = $name;
$name =~ s/-\d+$//; #remove -number
if(defined($accession{$name})){
}
elsif(defined($synonym{$name})){
$name = $synonym{$name};
if(!defined($accession{$name})){
print "Havana name $copy which has a synonym of $name cannot be found in the HGNC data???\n";
$not_in_hgnc++;
next;
}
print "Havana uses old name $copy instead of $name\n";
}
else{
print "Havana name ($copy) $name cannot be found in the HGNC data???\n";
$not_in_hgnc++;
next;
}
$xref_id = $self->add_xref($accession{$name}, "" , $name , $description{$name}, $id, $species_id, "DIRECT");
}
my $xref_id = $self->add_xref($name, "" , $name , "", $id, $species_id, "DIRECT");
$xref_count++;
$self->add_direct_xref($xref_id, $ott_to_enst{$ott}, "transcript", "");
}
}
print "$xref_count direct xrefs succesfully parsed\n" if($verbose);
print "$not_in_hgnc xrefs could not be loaded as they were not in HGNC\n)" if($verbose and $not_in_hgnc);
return 0;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment