Commit 9c6ea761 authored by Ian Longden's avatar Ian Longden
Browse files

XrefMapper/XrefLoader.pm

parent 99ff8cb8
......@@ -139,6 +139,25 @@ sub update{
$sth->finish;
$transaction_end_sth->execute();
#}
######################################################
# Sort out Interpro (xrefs added by unmapped entries)
######################################################
my $interpro_test_sth = $self->xref->dbc->prepare('select count(1) from xref x, source s where x.source_id = s.source_id and s.name like "InterPro"');
$interpro_test_sth->execute();
my ($int_count);
$interpro_test_sth->bind_columns(\$int_count);
$interpro_test_sth->fetch();
if(defined($int_count) and $int_count){
my $ex_id = $name_to_external_db_id{"InterPro"};
$xref_sth->execute($ex_id);
}
$interpro_test_sth->finish;
$synonym_sth->finish;
$go_sth->finish;
$identity_sth->finish;
......@@ -193,7 +212,7 @@ sub update{
my $direct_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type from xref x, object_xref ox where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id');
# $dependent_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, d.master_xref_id from xref x, object_xref ox, dependent_xref d where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and d.object_xref_id = ox.object_xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id');
# $dependent_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, d.master_xref_id from xref x, object_xref ox, dependent_xref d where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and d.object_xref_id = ox.object_xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id');
$dependent_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, ox.master_xref_id from xref x, object_xref ox where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id');
......@@ -212,6 +231,8 @@ GSQL
my $seq_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, i.query_identity, i.target_identity, i.hit_start, i.hit_end, i.translation_start, i.translation_end, i.cigar_line, i.score, i.evalue from xref x, object_xref ox, identity_xref i where ox.ox_status = "DUMP_OUT" and i.object_xref_id = ox.object_xref_id and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id');
########################
# SQL to add data to core
#########################
......
......@@ -112,6 +112,27 @@ sub run {
# If no RefSeq, use the Swissprot instead
my $seen = 0;
# store as list_only first
$self->add_xref($array[0], "", $array[1], $array[2], $hgnc_desc_only, $species_id, "MISC");
if (defined($array[3])) { # dead name, add to synonym
my @array2 = split(',\s*', $array[3]);
foreach my $arr (@array2){
XrefParser::BaseParser->add_to_syn($array[0], $hgnc_desc_only, $arr, $species_id);
}
}
if (defined($array[4])) { # alias, add to synonym
my @array2 = split(',\s*', $array[4]);
foreach my $arr (@array2){
XrefParser::BaseParser->add_to_syn($array[0], $hgnc_desc_only, $arr, $species_id);
}
}
if ($array[9]){ # Ensembl direct xref
$seen =1;
$ensembl_count++;
......@@ -218,21 +239,7 @@ sub run {
}
}
}
if(!$seen){ # Store to keep descriptions etc
$self->add_xref($array[0], "", $array[1], $array[2], $hgnc_desc_only, $species_id, "MISC");
if (defined($array[3])) { # dead name, add to synonym
my @array2 = split(',\s*', $array[3]);
foreach my $arr (@array2){
XrefParser::BaseParser->add_to_syn($array[0], $hgnc_desc_only, $arr, $species_id);
}
}
if (defined($array[4])) { # alias, add to synonym
my @array2 = split(',\s*', $array[4]);
foreach my $arr (@array2){
XrefParser::BaseParser->add_to_syn($array[0], $hgnc_desc_only, $arr, $species_id);
}
}
if(!$seen){
$mismatch++;
}
......
......@@ -58,8 +58,9 @@ sub run_script {
my $clone_source_id =
$self->get_source_id_for_source_name('Clone_based_vega_transcript');
my $curated_source_id =
$self->get_source_id_for_source_name('HGNC_curated_transcript');
my $hgnc_source_id =
$self->get_source_id_for_source_name('HGNC','havana');
my $sql = 'select tsi.stable_id, x.display_label from xref x, object_xref ox , transcript_stable_id tsi, external_db e where e.external_db_id = x.external_db_id and x.xref_id = ox.xref_id and tsi.transcript_id = ox.ensembl_id and e.db_name like ?';
......@@ -97,15 +98,114 @@ sub run_script {
my $xref_count = 0;
my $dbi = $self->dbi();
my %synonym;
my $dbname = "HGNC";
my $syn;
my $name;
$sth = $dbi->prepare('select es.synonym, x.label from synonym es, xref x, source s where x.xref_id = es.xref_id and x.source_id = s.source_id and s.name = "EntrezGene"' );
$sth->execute();
$sth->bind_columns(\$syn,\$name);
while($sth->fetch){
$synonym{$syn} = $name;
}
$sth->finish;
$sth = $dbi->prepare('select es.synonym, x.label from synonym es, xref x, source s where x.xref_id = es.xref_id and x.source_id = s.source_id and s.name = "'.$dbname.'" and s.priority_description like "desc_only"');
$sth->execute();
$sth->bind_columns(\$syn,\$name);
while($sth->fetch){
$synonym{$syn} = $name;
}
$sth->finish;
#get the source ids for HGNC sources
my (%accession, %version, %description);
$sql = 'select source_id from source where name like "HGNC" ';
$sql .= 'and priority_description like "desc_only" ';
$sth = $dbi->prepare($sql);
$sth->execute();
my ($hgnc_source_id);
$sth->bind_columns(\$hgnc_source_id);
my @arr;
while($sth->fetch()){
push @arr, $hgnc_source_id;
}
$sth->finish;
$sql = "select accession, label, version, description from xref where source_id in (".join(", ",@arr).")";
$sth = $dbi->prepare($sql);
$sth->execute();
my ($acc, $lab, $ver, $desc);
my $hgnc_loaded_count = 0;
$sth->bind_columns(\$acc, \$lab, \$ver, \$desc);
while (my @row = $sth->fetchrow_array()) {
$accession{$lab} = $acc;
$version{$lab} = $ver;
$description{$lab} = $desc;
$hgnc_loaded_count++;
}
$sth->finish;
if($hgnc_loaded_count == 0){
die "No point continuing no hgncs there\n";
}
my $not_in_hgnc = 0;
foreach my $ott (keys %ott_to_enst){
if(defined($ott_to_vega_name{$ott})){
my $id = $curated_source_id;
my $id = $hgnc_source_id;
my $name = $ott_to_vega_name{$ott};
my $acc = undef;
my $xref_id ;
if($name =~ /[.]/){
$id = $clone_source_id;
$name =~ s/[.]\d+//; #remove .number
$xref_id = $self->add_xref($name, "" , $name , $description{$name}, $id, $species_id, "DIRECT");
}
else{
my $copy = $name;
$name =~ s/-\d+$//; #remove -number
if(defined($accession{$name})){
}
elsif(defined($synonym{$name})){
$name = $synonym{$name};
if(!defined($accession{$name})){
print "Havana name $copy which has a synonym of $name cannot be found in the HGNC data???\n";
$not_in_hgnc++;
next;
}
print "Havana uses old name $copy instead of $name\n";
}
else{
print "Havana name ($copy) $name cannot be found in the HGNC data???\n";
$not_in_hgnc++;
next;
}
$xref_id = $self->add_xref($accession{$name}, "" , $name , $description{$name}, $id, $species_id, "DIRECT");
}
my $xref_id = $self->add_xref($name, "" , $name , "", $id, $species_id, "DIRECT");
$xref_count++;
$self->add_direct_xref($xref_id, $ott_to_enst{$ott}, "transcript", "");
......@@ -113,6 +213,7 @@ sub run_script {
}
print "$xref_count direct xrefs succesfully parsed\n" if($verbose);
print "$not_in_hgnc xrefs could not be loaded as they were not in HGNC\n)" if($verbose and $not_in_hgnc);
return 0;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment