From 69a6a321c2f70fd86dcbf6a196e4cf0196b03161 Mon Sep 17 00:00:00 2001 From: Magali Ruffier <mr6@ebi.ac.uk> Date: Wed, 17 Jan 2018 13:06:47 +0000 Subject: [PATCH] make dbconnection persistent --- .../xref_mapping/XrefMapper/BasicMapper.pm | 20 +- .../xref_mapping/XrefMapper/ChecksumMapper.pm | 9 +- .../XrefMapper/CoordinateMapper.pm | 1 + .../xref_mapping/XrefMapper/DirectXrefs.pm | 108 ++++--- .../xref_mapping/XrefMapper/DisplayXrefs.pm | 102 ++++--- .../xref_mapping/XrefMapper/OfficialNaming.pm | 251 ++++++++------- .../XrefMapper/ProcessMappings.pm | 52 ++-- .../xref_mapping/XrefMapper/ProcessPaired.pm | 45 ++- .../XrefMapper/ProcessPrioritys.pm | 53 ++-- .../xref_mapping/XrefMapper/TestMappings.pm | 56 ++-- .../xref_mapping/XrefMapper/XrefLoader.pm | 146 ++++----- .../XrefParser/ArrayExpressParser.pm | 12 +- .../xref_mapping/XrefParser/BaseParser.pm | 289 ++++++++---------- .../xref_mapping/XrefParser/CCDSParser.pm | 5 +- .../xref_mapping/XrefParser/CoreXrefParser.pm | 11 +- .../xref_mapping/XrefParser/DBASSParser.pm | 9 +- .../XrefParser/EntrezGeneParser.pm | 10 +- .../xref_mapping/XrefParser/HGNCParser.pm | 36 ++- .../XrefParser/HGNC_CCDSParser.pm | 9 +- .../xref_mapping/XrefParser/HPAParser.pm | 7 +- .../xref_mapping/XrefParser/JGI_Parser.pm | 6 +- .../xref_mapping/XrefParser/MGIParser.pm | 10 +- .../XrefParser/MGI_CCDS_Parser.pm | 5 +- .../XrefParser/MGI_Desc_Parser.pm | 5 +- .../xref_mapping/XrefParser/MIMParser.pm | 12 +- .../xref_mapping/XrefParser/Mim2GeneParser.pm | 13 +- .../xref_mapping/XrefParser/PomBaseParser.pm | 14 +- .../xref_mapping/XrefParser/RFAMParser.pm | 14 +- .../xref_mapping/XrefParser/RGDParser.pm | 13 +- .../xref_mapping/XrefParser/ReactomeParser.pm | 24 +- .../XrefParser/RefSeqCoordinateParser.pm | 27 +- .../XrefParser/RefSeqGPFFParser.pm | 59 ++-- .../xref_mapping/XrefParser/RefSeqParser.pm | 36 +-- .../XrefParser/RefSeq_CCDSParser.pm | 11 +- .../xref_mapping/XrefParser/SGDParser.pm | 14 +- .../xref_mapping/XrefParser/UCSCParser.pm | 5 +- .../XrefParser/UniProtDirectParser.pm | 8 +- .../xref_mapping/XrefParser/UniProtParser.pm | 43 +-- .../UniProtParser_descriptions_only.pm | 30 +- .../xref_mapping/XrefParser/VGNCParser.pm | 10 +- .../XrefParser/XenopusJamboreeParser.pm | 3 + .../xref_mapping/XrefParser/ZFINParser.pm | 15 +- .../xref_mapping/XrefParser/miRBaseParser.pm | 13 +- 43 files changed, 862 insertions(+), 759 deletions(-) diff --git a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm index 44f040e77a..7cfe346cd6 100644 --- a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm +++ b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm @@ -490,7 +490,6 @@ sub get_id_from_species_name { while(my @row2 = $sth->fetchrow_array()){ print STDERR $row2[0]."\n"; } - croak("Please try again :-)\n"); } $sth->finish(); @@ -651,8 +650,8 @@ sub get_official_name { # i.e. move all HGNC from transcripts to Genes. # sub biomart_fix{ - my ($self, $db_name, $type1, $type2, $verbose) = @_; - my $xref_dbc = $self->xref->dbc; + my ($self, $db_name, $type1, $type2, $verbose, $xref_dbc) = @_; + $xref_dbc = $self->xref->dbc unless defined $xref_dbc; print "$db_name is associated with both $type1 and $type2 object types\n" if(defined($verbose)); print "$db_name moved to Gene level.\n" if(!defined($verbose)); @@ -702,7 +701,6 @@ sub biomart_fix{ source.name = "$db_name"; EOF my $result = $xref_dbc->do($sql) ; -# print "\n$sql\n"; if($db_name eq "GO" || $db_name eq 'goslim_goa'){ $sql =(<<"EOF2"); @@ -867,7 +865,6 @@ sub get_species_id_from_species_name{ while(my @row2 = $sth->fetchrow_array()){ print STDERR $row2[0]."\n"; } - croak("Please try again :-)\n"); } $sth->finish(); @@ -1031,6 +1028,8 @@ sub get_alt_allele_hashes{ sub process_alt_alleles{ my $self = shift; + my $dbc = shift; + $dbc = $self->xref->dbc unless defined $dbc; # ALL are on the Gene level now. This may change but for now it is okay. my ($alt_to_ref, $ref_to_alts) = $self->get_alt_allele_hashes(); @@ -1086,9 +1085,9 @@ DELETE ox DEL $del_sql .= "'".join("', '",$self->get_gene_specific_list()) . "')"; - my $move_sth = $self->xref->dbc->prepare($move_sql) || croak "$move_sql cannot be prepared"; - my $del_ix_sth = $self->xref->dbc->prepare($del_ix_sql) || croak "$del_ix_sql cannot be prepared"; - my $del_sth = $self->xref->dbc->prepare($del_sql) || croak "$del_sql cannot be prepared"; + my $move_sth = $dbc->prepare($move_sql) || croak "$move_sql cannot be prepared"; + my $del_ix_sth = $dbc->prepare($del_ix_sql) || croak "$del_ix_sql cannot be prepared"; + my $del_sth = $dbc->prepare($del_sql) || croak "$del_sql cannot be prepared"; my $move_count = 0; my $del_ix_count = 0; @@ -1229,14 +1228,15 @@ sub get_gene_specific_list { # sub source_defined_move{ my $self = shift; + my $dbi = shift; my $tester = XrefMapper::TestMappings->new($self); if($tester->unlinked_entries){ croak "Problems found before source_defined_move\n"; } foreach my $source ($self->get_gene_specific_list()){ - $self->biomart_fix($source,"Translation","Gene"); - $self->biomart_fix($source,"Transcript","Gene"); + $self->biomart_fix($source,"Translation","Gene", undef, undef, $dbi); + $self->biomart_fix($source,"Transcript","Gene", undef, undef, $dbi); } if($tester->unlinked_entries){ croak "Problems found after source_defined_move\n"; diff --git a/misc-scripts/xref_mapping/XrefMapper/ChecksumMapper.pm b/misc-scripts/xref_mapping/XrefMapper/ChecksumMapper.pm index 26e7a8de8f..2f5d5707d9 100644 --- a/misc-scripts/xref_mapping/XrefMapper/ChecksumMapper.pm +++ b/misc-scripts/xref_mapping/XrefMapper/ChecksumMapper.pm @@ -71,7 +71,7 @@ sub target { } sub process { - my ($self, $db_url) = @_; + my ($self, $db_url, $species_id) = @_; $self->_update_status('checksum_xrefs_started'); my $source_id = $self->source_id(); @@ -82,7 +82,7 @@ sub process { my $method = $self->get_method(); my $results = $method->run($target, $source_id, $object_type, $db_url); $self->log_progress('Starting upload'); - $self->upload($results); + $self->upload($results, $species_id); } $self->_update_status('checksum_xrefs_finished'); @@ -90,7 +90,7 @@ sub process { } sub upload { - my ($self, $results) = @_; + my ($self, $results, $species_id) = @_; #The elements come in as an array looking like # [ { id => 1, upi => 'UPI00000A', object_type => 'Translation' } ] @@ -105,7 +105,8 @@ SQL my $h = $self->_xref_helper(); my $source_id = $self->source_id(); - my $species_id = $self->species_id(); + $species_id = $self->species_id() unless defined $species_id; + if (!defined $species_id) { return; } $h->transaction(-CALLBACK => sub { diff --git a/misc-scripts/xref_mapping/XrefMapper/CoordinateMapper.pm b/misc-scripts/xref_mapping/XrefMapper/CoordinateMapper.pm index 9d95d622dd..bbc6d9d600 100644 --- a/misc-scripts/xref_mapping/XrefMapper/CoordinateMapper.pm +++ b/misc-scripts/xref_mapping/XrefMapper/CoordinateMapper.pm @@ -86,6 +86,7 @@ sub run_coordinatemapping { my $species = $core_db->species(); # my $species_id = $self->mapper->core->species; my $species_id = XrefMapper::BasicMapper::get_species_id_from_species_name( $xref_db, $species ); + if (!defined $species_id) { return; } # We only do coordinate mapping for mouse and human for now. diff --git a/misc-scripts/xref_mapping/XrefMapper/DirectXrefs.pm b/misc-scripts/xref_mapping/XrefMapper/DirectXrefs.pm index 76837d81c1..93c1477629 100644 --- a/misc-scripts/xref_mapping/XrefMapper/DirectXrefs.pm +++ b/misc-scripts/xref_mapping/XrefMapper/DirectXrefs.pm @@ -45,15 +45,14 @@ sub new { sub get_ins_ix_sth { my $self = shift; + my $dbi = shift; - if(!defined($self->{'_ins_ix_sth'})){ - my $sql = (<<"IIX"); + my $sql = (<<"IIX"); INSERT IGNORE INTO identity_xref (object_xref_id, query_identity, target_identity) VALUES (?, 100, 100) IIX - $self->{'_ins_ix_sth'} = $self->xref->dbc->prepare($sql); - } - return $self->{'_ins_ix_sth'}; + my $sth = $dbi->prepare($sql); + return $sth; } @@ -63,22 +62,19 @@ sub process { # Now process the direct xrefs and add data to the object xrefs remember dependent xrefs. my $object_xref_id; - my $ox_sth = $self->xref->dbc->prepare("select max(object_xref_id) from object_xref"); - $ox_sth->execute(); - $ox_sth->bind_columns(\$object_xref_id); - $ox_sth->fetch(); - $ox_sth->finish; + my $dbi = $self->xref->dbc; # First get the sths needed for the processing of the direct xrefs; my $ins_ox_sql = (<<"IOS"); -INSERT INTO object_xref (object_xref_id, ensembl_id, xref_id, ensembl_object_type, linkage_type) - VALUES (?, ?, ?, ?, ?) +INSERT INTO object_xref (ensembl_id, xref_id, ensembl_object_type, linkage_type) + VALUES (?, ?, ?, ?) IOS - my $ins_ox_sth = $self->xref->dbc->prepare($ins_ox_sql); + my $ins_ox_sth = $dbi->prepare($ins_ox_sql); + my $get_object_xref_id_sth = $self->get_ox_id_sth($dbi); # Direct xrefs can be considered to be 100% matching - my $ins_ix_sth = $self->get_ins_ix_sth(); + my $ins_ix_sth = $self->get_ins_ix_sth($dbi); my $stable_sql=(<<"SQL"); SELECT so.name, dx.general_xref_id, s.internal_id, dx.ensembl_stable_id , dx.linkage_xref @@ -101,7 +97,7 @@ SQL my ($dbname, $xref_id, $internal_id, $stable_id, $linkage_type); my $sql = $stable_sql; $sql =~ s/TYPE/$table/g; - my $sth = $self->xref->dbc->prepare($sql); + my $sth = $dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$dbname, \$xref_id, \$internal_id, \$stable_id, \$linkage_type); my $count =0; @@ -126,7 +122,9 @@ SQL if($internal_id == 0){ die "Problem could not find stable id $stable_id and got past the first check for $dbname\n"; } - $ins_ox_sth->execute($object_xref_id, $internal_id, $xref_id, $table, 'DIRECT'); + $ins_ox_sth->execute($internal_id, $xref_id, $table, 'DIRECT'); + $get_object_xref_id_sth->execute($internal_id, $xref_id, $table, 'DIRECT'); + $object_xref_id = ($get_object_xref_id_sth->fetchrow_array())[0]; if($ins_ox_sth->err){ $duplicate_direct_count++; next; #duplicate @@ -136,10 +134,10 @@ SQL push @master_xref_ids, $xref_id; } $self->process_dependents({master_xrefs => \@master_xref_ids, - max_object_xref_id => \$object_xref_id, dup_count => \$duplicate_dependent_count, table => $table, - internal_id => $internal_id + internal_id => $internal_id, + dbi => $dbi, }); } @@ -152,7 +150,7 @@ SQL print STDERR "*WARNING*: ".$err_count{$key}." direct xrefs for database ".$key." could not be added as their stable_ids could not be found\n"; } - my $sth = $self->xref->dbc->prepare("insert into process_status (status, date) values('direct_xrefs_parsed',now())"); + my $sth = $dbi->prepare("insert into process_status (status, date) values('direct_xrefs_parsed',now())"); $sth->execute(); $sth->finish; @@ -162,72 +160,94 @@ SQL sub get_dep_sth { my $self = shift; + my $dbi = shift; - if(!defined($self->{'_dep_sth'})){ - my $dep_sql = (<<"DSS"); + my $dep_sql = (<<"DSS"); SELECT dependent_xref_id, linkage_annotation FROM dependent_xref WHERE master_xref_id = ? DSS - $self->{'_dep_sth'} = $self->xref->dbc->prepare($dep_sql); - } - return $self->{'_dep_sth'}; + my $sth = $dbi->prepare($dep_sql); + return $sth; } sub get_dep_go_sth { my $self = shift; + my $dbi = shift; - if(!defined($self->{'_dep_go_sth'})){ - my $sql = (<<"IGO"); + my $sql = (<<"IGO"); INSERT INTO go_xref (object_xref_id, linkage_type, source_xref_id) VALUES (?,?,?) IGO - $self->{'_dep_go_sth'} = $self->xref->dbc->prepare($sql); - } - return $self->{'_dep_go_sth'}; + my $sth = $dbi->prepare($sql); + return $sth; } sub get_add_dep_ox { my $self = shift; + my $dbi = shift; - if(!defined($self->{'_add_dep_ox_sth'})){ - my $sql = (<<"IO2"); -INSERT INTO object_xref (object_xref_id, ensembl_id, xref_id, ensembl_object_type, linkage_type, master_xref_id) - VALUES (?, ?, ?, ?, ?, ?) + my $sql = (<<"IO2"); +INSERT INTO object_xref (ensembl_id, xref_id, ensembl_object_type, linkage_type, master_xref_id) + VALUES (?, ?, ?, ?, ?) IO2 - $self->{'_add_dep_ox_sth'} = $self->xref->dbc->prepare($sql); - } - return $self->{'_add_dep_ox_sth'}; + my $sth = $dbi->prepare($sql); + return $sth; +} + +sub get_ox_id_sth { + my $self = shift; + my $dbi = shift; + + my $sql = (<<"IO2"); +select object_xref_id from object_xref where ensembl_id = ? and xref_id = ? and ensembl_object_type = ? and linkage_type = ? +IO2 + my $sth = $dbi->prepare($sql); + return $sth; +} + +sub get_ox_id_master_sth { + my $self = shift; + my $dbi = shift; + + my $sql = (<<"IO2"); +select object_xref_id from object_xref where ensembl_id = ? and xref_id = ? and ensembl_object_type = ? and linkage_type = ? and master_xref_id = ? +IO2 + my $sth = $dbi->prepare($sql); + return $sth; } sub process_dependents { my ($self, $arg_ref) = @_; + my $dbi = $arg_ref->{dbi}; my $master_xref_ids = $arg_ref->{master_xrefs}; - my $object_xref_id = $arg_ref->{max_object_xref_id}; my $duplicate_dep_count = $arg_ref->{dup_count}; my $table = $arg_ref->{table}; my $internal_id = $arg_ref->{internal_id}; - my $dep_sth = $self->get_dep_sth(); - my $ins_go_dep_sth = $self->get_dep_go_sth(); - my $ins_ox_sth2 = $self->get_add_dep_ox(); - my $ins_ix_sth = $self->get_ins_ix_sth(); + my $dep_sth = $self->get_dep_sth($dbi); + my $ins_go_dep_sth = $self->get_dep_go_sth($dbi); + my $ins_ox_sth2 = $self->get_add_dep_ox($dbi); + my $ins_ix_sth = $self->get_ins_ix_sth($dbi); + my $get_object_xref_id_sth = $self->get_ox_id_master_sth($dbi); local $ins_ox_sth2->{RaiseError} = 0; # want to see duplicates and not die automatically local $ins_ox_sth2->{PrintError} = 0; + my $object_xref_id; while(my $master_xref_id = pop(@$master_xref_ids)){ my ($dep_xref_id, $link); $dep_sth->execute($master_xref_id); $dep_sth->bind_columns(\$dep_xref_id, \$link); while($dep_sth->fetch){ - $$object_xref_id++; - $ins_ox_sth2->execute($$object_xref_id, $internal_id, $dep_xref_id, $table, 'DEPENDENT', $master_xref_id); + $ins_ox_sth2->execute($internal_id, $dep_xref_id, $table, 'DEPENDENT', $master_xref_id); + $get_object_xref_id_sth->execute($internal_id, $dep_xref_id, $table, 'DEPENDENT', $master_xref_id); + $object_xref_id = ($get_object_xref_id_sth->fetchrow_array())[0]; if($ins_ox_sth2->err){ my $err = $ins_ox_sth2->errstr; if($err =~ /Duplicate/){ @@ -238,7 +258,7 @@ sub process_dependents { die "Problem loading error is $err\n"; } } - $ins_ix_sth->execute($$object_xref_id); + $ins_ix_sth->execute($object_xref_id); push @$master_xref_ids, $dep_xref_id; # get the dependent, dependents just in case if(defined($link) and $link ne ""){ # we have a go term linkage type diff --git a/misc-scripts/xref_mapping/XrefMapper/DisplayXrefs.pm b/misc-scripts/xref_mapping/XrefMapper/DisplayXrefs.pm index 561280c30b..0b0b8d41dd 100644 --- a/misc-scripts/xref_mapping/XrefMapper/DisplayXrefs.pm +++ b/misc-scripts/xref_mapping/XrefMapper/DisplayXrefs.pm @@ -213,25 +213,27 @@ sub set_display_xrefs_from_stable_table{ print "Setting Transcript and Gene display_xrefs from xref database into core and setting the desc\n" if ($self->verbose); my $xref_offset = $self->get_meta_value("xref_offset"); + my $core_dbi = $self->core->dbc; + my $xref_dbi = $self->xref->dbc; print "Using xref_off set of $xref_offset\n" if($self->verbose); - my $reset_sth = $self->core->dbc->prepare("UPDATE gene SET display_xref_id = null"); + my $reset_sth = $core_dbi->prepare("UPDATE gene SET display_xref_id = null"); $reset_sth->execute(); $reset_sth->finish; - $reset_sth = $self->core->dbc->prepare("UPDATE transcript SET display_xref_id = null WHERE biotype NOT IN ('LRG_gene')"); + $reset_sth = $core_dbi->prepare("UPDATE transcript SET display_xref_id = null WHERE biotype NOT IN ('LRG_gene')"); $reset_sth->execute(); $reset_sth->finish; - $reset_sth = $self->core->dbc->prepare("UPDATE gene SET description = null"); + $reset_sth = $core_dbi->prepare("UPDATE gene SET description = null"); $reset_sth->execute(); $reset_sth->finish; my %name_to_external_name; my $sql = "select external_db_id, db_name, db_display_name from external_db"; - my $sth = $self->core->dbc->prepare($sql); + my $sth = $core_dbi->prepare($sql); $sth->execute(); my ($id, $name, $display_name); $sth->bind_columns(\$id, \$name, \$display_name); @@ -243,7 +245,7 @@ sub set_display_xrefs_from_stable_table{ my %source_id_to_external_name; $sql = 'select s.source_id, s.name from source s, xref x where x.source_id = s.source_id group by s.source_id'; # only get those of interest - $sth = $self->xref->dbc->prepare($sql); + $sth = $xref_dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$id, \$name); @@ -255,21 +257,21 @@ sub set_display_xrefs_from_stable_table{ $sth->finish; - my $update_gene_sth = $self->core->dbc->prepare("UPDATE gene g SET g.display_xref_id= ? WHERE g.gene_id=?"); - my $update_gene_desc_sth = $self->core->dbc->prepare("UPDATE gene g SET g.description= ? WHERE g.gene_id=?"); + my $update_gene_sth = $core_dbi->prepare("UPDATE gene g SET g.display_xref_id= ? WHERE g.gene_id=?"); + my $update_gene_desc_sth = $core_dbi->prepare("UPDATE gene g SET g.description= ? WHERE g.gene_id=?"); - my $update_tran_sth = $self->core->dbc->prepare("UPDATE transcript t SET t.display_xref_id= ? WHERE t.transcript_id=?"); + my $update_tran_sth = $core_dbi->prepare("UPDATE transcript t SET t.display_xref_id= ? WHERE t.transcript_id=?"); - my $get_gene_display_xref = $self->xref->dbc->prepare("SELECT gsi.internal_id, gsi.display_xref_id, x.description ,x.source_id, x.accession + my $get_gene_display_xref = $xref_dbi->prepare("SELECT gsi.internal_id, gsi.display_xref_id, x.description ,x.source_id, x.accession FROM gene_stable_id gsi, xref x WHERE gsi.display_xref_id = x.xref_id"); - my $get_tran_display_xref = $self->xref->dbc->prepare("SELECT gsi.internal_id, gsi.display_xref_id from transcript_stable_id gsi"); + my $get_tran_display_xref = $xref_dbi->prepare("SELECT gsi.internal_id, gsi.display_xref_id from transcript_stable_id gsi"); - $reset_sth = $self->xref->dbc->prepare("UPDATE gene_stable_id gsi SET gsi.desc_set=0"); + $reset_sth = $xref_dbi->prepare("UPDATE gene_stable_id gsi SET gsi.desc_set=0"); $reset_sth->execute(); - my $set_desc_done_sth = $self->xref->dbc->prepare("UPDATE gene_stable_id gsi SET gsi.desc_set=1 WHERE gsi.internal_id=?"); + my $set_desc_done_sth = $xref_dbi->prepare("UPDATE gene_stable_id gsi SET gsi.desc_set=1 WHERE gsi.internal_id=?"); $get_gene_display_xref->execute(); my $xref_id; @@ -315,7 +317,7 @@ sub set_display_xrefs_from_stable_table{ # Synonyms are only used as alternative gene names, so should be synonyms of the gene symbol chosen # - my $syn_clean_sth = $self->core->dbc->prepare("DELETE es FROM external_synonym es, xref x LEFT JOIN gene g ON g.display_xref_id = x.xref_id WHERE es.xref_id = x.xref_id AND isnull(g.display_xref_id)"); + my $syn_clean_sth = $core_dbi->prepare("DELETE es FROM external_synonym es, xref x LEFT JOIN gene g ON g.display_xref_id = x.xref_id WHERE es.xref_id = x.xref_id AND isnull(g.display_xref_id)"); $syn_clean_sth->execute(); $syn_clean_sth->finish(); @@ -384,35 +386,37 @@ sub set_display_xrefs{ print "Building Transcript and Gene display_xrefs\n" if ($self->verbose); my $xref_offset = $self->get_meta_value("xref_offset"); + my $core_dbi = $self->core->dbc(); + my $xref_dbi = $self->xref->dbc(); print "Using xref_off set of $xref_offset\n" if($self->verbose); - my $reset_sth = $self->core->dbc->prepare("UPDATE gene SET display_xref_id = null"); + my $reset_sth = $core_dbi->prepare("UPDATE gene SET display_xref_id = null"); $reset_sth->execute(); $reset_sth->finish; - $reset_sth = $self->core->dbc->prepare("UPDATE transcript SET display_xref_id = null WHERE biotype NOT IN ('LRG_gene')"); + $reset_sth = $core_dbi->prepare("UPDATE transcript SET display_xref_id = null WHERE biotype NOT IN ('LRG_gene')"); $reset_sth->execute(); $reset_sth->finish; - my $update_gene_sth = $self->core->dbc->prepare("UPDATE gene g SET g.display_xref_id= ? WHERE g.gene_id=?"); - my $update_tran_sth = $self->core->dbc->prepare("UPDATE transcript t SET t.display_xref_id= ? WHERE t.transcript_id=?"); + my $update_gene_sth = $core_dbi->prepare("UPDATE gene g SET g.display_xref_id= ? WHERE g.gene_id=?"); + my $update_tran_sth = $core_dbi->prepare("UPDATE transcript t SET t.display_xref_id= ? WHERE t.transcript_id=?"); # Set status to 'NO_DISPLAY' for object_xrefs with a display_label that is just numeric; - my $update_ignore_sth = $self->xref->dbc->prepare('UPDATE object_xref ox, source s, xref x SET ox_status = "NO_DISPLAY" where ox_status like "DUMP_OUT" and s.source_id = x.source_id and x.label REGEXP "^[0-9]+$" and ox.xref_id = x.xref_id'); + my $update_ignore_sth = $xref_dbi->prepare('UPDATE object_xref ox, source s, xref x SET ox_status = "NO_DISPLAY" where ox_status like "DUMP_OUT" and s.source_id = x.source_id and x.label REGEXP "^[0-9]+$" and ox.xref_id = x.xref_id'); $update_ignore_sth->execute(); $update_ignore_sth->finish; - my $ins_p_sth = $self->xref->dbc->prepare("INSERT ignore into display_xref_priority (ensembl_object_type,source_id, priority) values(?, ?, ?)"); - my $get_source_id_sth = $self->xref->dbc->prepare("select source_id from source where name like ? order by priority"); - my $list_sources_sth = $self->xref->dbc->prepare("select distinct name from display_xref_priority d join source using(source_id) where ensembl_object_type = ? order by d.priority"); + my $ins_p_sth = $xref_dbi->prepare("INSERT ignore into display_xref_priority (ensembl_object_type,source_id, priority) values(?, ?, ?)"); + my $get_source_id_sth = $xref_dbi->prepare("select source_id from source where name like ? order by priority"); + my $list_sources_sth = $xref_dbi->prepare("select distinct name from display_xref_priority d join source using(source_id) where ensembl_object_type = ? order by d.priority"); - $update_ignore_sth = $self->xref->dbc->prepare('UPDATE object_xref SET ox_status = "NO_DISPLAY" where object_xref_id = ?'); + $update_ignore_sth = $xref_dbi->prepare('UPDATE object_xref SET ox_status = "NO_DISPLAY" where object_xref_id = ?'); my %object_types = ('gene' => 'Gene', 'transcript' => 'Transcript'); @@ -461,7 +465,7 @@ sub set_display_xrefs{ foreach my $ignore_sql (values %$ignore){ print "IGNORE SQL: $ignore_sql\n" if($self->verbose); - my $ignore_sth = $self->xref->dbc->prepare($ignore_sql); + my $ignore_sth = $xref_dbi->prepare($ignore_sql); $ignore_sth->execute(); my ($object_xref_id); $ignore_sth->bind_columns(\$object_xref_id); @@ -518,7 +522,7 @@ DXS my %object_seen; - my $display_xref_sth = $self->xref->dbc->prepare($display_xref_sql); + my $display_xref_sth = $xref_dbi->prepare($display_xref_sql); my $display_xref_count = 0; $display_xref_sth->execute($object_type); @@ -555,7 +559,7 @@ DXS # reset the status to DUMP_OUT fro object_xrefs that where ignored for the display_xref; # - my $reset_status_sth = $self->xref->dbc->prepare('UPDATE object_xref SET ox_status = "DUMP_OUT" where ox_status = "NO_DISPLAY"'); + my $reset_status_sth = $xref_dbi->prepare('UPDATE object_xref SET ox_status = "DUMP_OUT" where ox_status = "NO_DISPLAY"'); $reset_status_sth->execute(); $reset_status_sth->finish; @@ -564,7 +568,7 @@ DXS # Synonyms are only used as alternative gene names, so should be synonyms of the gene symbol chosen # - my $syn_clean_sth = $self->core->dbc->prepare("DELETE es FROM external_synonym es, xref x LEFT JOIN gene g ON g.display_xref_id = x.xref_id WHERE es.xref_id = x.xref_id AND isnull(g.display_xref_id)"); + my $syn_clean_sth = $core_dbi->prepare("DELETE es FROM external_synonym es, xref x LEFT JOIN gene g ON g.display_xref_id = x.xref_id WHERE es.xref_id = x.xref_id AND isnull(g.display_xref_id)"); $syn_clean_sth->execute(); $syn_clean_sth->finish(); @@ -574,25 +578,27 @@ DXS sub transcript_names_from_gene { my $self = shift; + my $core_dbi = $self->core->dbc; + my $xref_dbi = $self->xref->dbc; print "Assigning transcript names from gene names\n" if ($self->verbose); - my $reset_sth = $self->core->dbc->prepare("UPDATE transcript SET display_xref_id = null WHERE biotype NOT IN ('LRG_gene')"); + my $reset_sth = $core_dbi->prepare("UPDATE transcript SET display_xref_id = null WHERE biotype NOT IN ('LRG_gene')"); $reset_sth->execute(); $reset_sth->finish; - my $xref_id_sth = $self->core->dbc->prepare("SELECT max(xref_id) FROM xref"); - my $ox_id_sth = $self->core->dbc->prepare("SELECT max(object_xref_id) FROM object_xref"); - my $del_xref_sth = $self->core->dbc->prepare("DELETE x FROM xref x, object_xref ox WHERE x.xref_id = ox.xref_id AND ensembl_object_type = 'Transcript' AND display_label REGEXP '-2[0-9]{2}\$'"); - my $reuse_xref_sth = $self->core->dbc->prepare("SELECT xref_id FROM xref x WHERE external_db_id = ? AND display_label = ? AND description = ? AND info_type = 'MISC'"); - my $del_ox_sth = $self->core->dbc->prepare("DELETE ox FROM object_xref ox LEFT JOIN xref x ON x.xref_id = ox.xref_id WHERE isnull(x.xref_id)"); - my $ins_xref_sth = $self->core->dbc->prepare("INSERT IGNORE into xref (xref_id, external_db_id, dbprimary_acc, display_label, version, description, info_type, info_text) values(?, ?, ?, ?, 0, ?, 'MISC', ?)"); - my $ins_ox_sth = $self->core->dbc->prepare("INSERT into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id) values(?, ?, 'Transcript', ?)"); - my $update_tran_sth = $self->core->dbc->prepare("UPDATE transcript t SET t.display_xref_id= ? WHERE t.transcript_id=?"); + my $xref_id_sth = $core_dbi->prepare("SELECT max(xref_id) FROM xref"); + my $ox_id_sth = $core_dbi->prepare("SELECT max(object_xref_id) FROM object_xref"); + my $del_xref_sth = $core_dbi->prepare("DELETE x FROM xref x, object_xref ox WHERE x.xref_id = ox.xref_id AND ensembl_object_type = 'Transcript' AND display_label REGEXP '-2[0-9]{2}\$'"); + my $reuse_xref_sth = $core_dbi->prepare("SELECT xref_id FROM xref x WHERE external_db_id = ? AND display_label = ? AND description = ? AND info_type = 'MISC'"); + my $del_ox_sth = $core_dbi->prepare("DELETE ox FROM object_xref ox LEFT JOIN xref x ON x.xref_id = ox.xref_id WHERE isnull(x.xref_id)"); + my $ins_xref_sth = $core_dbi->prepare("INSERT IGNORE into xref (xref_id, external_db_id, dbprimary_acc, display_label, version, description, info_type, info_text) values(?, ?, ?, ?, 0, ?, 'MISC', ?)"); + my $ins_ox_sth = $core_dbi->prepare("INSERT into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id) values(?, ?, 'Transcript', ?)"); + my $update_tran_sth = $core_dbi->prepare("UPDATE transcript t SET t.display_xref_id= ? WHERE t.transcript_id=?"); - my $get_genes = $self->core->dbc->prepare("SELECT g.gene_id, e.db_name, x.dbprimary_acc, x.display_label, x.description FROM gene g, xref x, external_db e where g.display_xref_id = x.xref_id and e.external_db_id = x.external_db_id"); - my $get_transcripts = $self->core->dbc->prepare("SELECT transcript_id FROM transcript WHERE gene_id = ? ORDER BY seq_region_start, seq_region_end"); - my $get_source_id = $self->core->dbc->prepare("SELECT external_db_id FROM external_db WHERE db_name like ?"); + my $get_genes = $core_dbi->prepare("SELECT g.gene_id, e.db_name, x.dbprimary_acc, x.display_label, x.description FROM gene g, xref x, external_db e where g.display_xref_id = x.xref_id and e.external_db_id = x.external_db_id"); + my $get_transcripts = $core_dbi->prepare("SELECT transcript_id FROM transcript WHERE gene_id = ? ORDER BY seq_region_start, seq_region_end"); + my $get_source_id = $core_dbi->prepare("SELECT external_db_id FROM external_db WHERE db_name like ?"); $get_genes->execute(); my ($gene_id, $external_db, $external_db_id, $acc, $label, $description, $transcript_id, $xref_id, $ox_id, $ext, $reuse_xref_id, $info_text); @@ -701,11 +707,13 @@ sub set_gene_descriptions{ my $self = shift; my $only_those_not_set = shift || 0; my $sql; + my $core_dbi = $self->core->dbc; + my $xref_dbi = $self->xref->dbc; - my $update_gene_desc_sth = $self->core->dbc->prepare("UPDATE gene SET description = ? where gene_id = ?"); + my $update_gene_desc_sth = $core_dbi->prepare("UPDATE gene SET description = ? where gene_id = ?"); if(!$only_those_not_set){ - my $reset_sth = $self->core->dbc->prepare("UPDATE gene SET description = null"); + my $reset_sth = $core_dbi->prepare("UPDATE gene SET description = null"); $reset_sth->execute(); $reset_sth->finish; } @@ -714,7 +722,7 @@ sub set_gene_descriptions{ if($only_those_not_set){ print "Only setting those not already set\n"; $sql = "select internal_id from gene_stable_id where desc_set = 1"; - my $sql_sth = $self->xref->dbc->prepare($sql); + my $sql_sth = $xref_dbi->prepare($sql); $sql_sth->execute; my $id; $sql_sth->bind_columns(\$id); @@ -730,7 +738,7 @@ sub set_gene_descriptions{ my %name_to_external_name; $sql = "select external_db_id, db_name, db_display_name from external_db"; - my $sth = $self->core->dbc->prepare($sql); + my $sth = $core_dbi->prepare($sql); $sth->execute(); my ($id, $name, $display_name); $sth->bind_columns(\$id, \$name, \$display_name); @@ -767,9 +775,9 @@ sub set_gene_descriptions{ @regexps = $self->gene_description_filter_regexps(); } - my $ins_p_sth = $self->xref->dbc->prepare("INSERT into gene_desc_priority (source_id, priority) values(?, ?)"); - my $get_source_id_sth = $self->xref->dbc->prepare("select source_id from source where name like ?"); - my $list_sources_sth = $self->xref->dbc->prepare("select distinct name from gene_desc_priority d join source using(source_id) order by d.priority"); + my $ins_p_sth = $xref_dbi->prepare("INSERT ignore into gene_desc_priority (source_id, priority) values(?, ?)"); + my $get_source_id_sth = $xref_dbi->prepare("select source_id from source where name like ?"); + my $list_sources_sth = $xref_dbi->prepare("select distinct name from gene_desc_priority d join source using(source_id) order by d.priority"); # The lower the priority number the better then my $i=0; @@ -837,7 +845,7 @@ DXS ######################################################################## - my $gene_sth = $self->core->dbc->prepare("select g.description from gene g where g.gene_id = ?"); + my $gene_sth = $core_dbi->prepare("select g.description from gene g where g.gene_id = ?"); my %no_source_name_in_desc; if( $self->mapper->can("no_source_label_list") ){ @@ -848,7 +856,7 @@ DXS } } - my $gene_desc_sth = $self->xref->dbc->prepare($gene_desc_sql); + my $gene_desc_sth = $xref_dbi->prepare($gene_desc_sql); $gene_desc_sth->execute(); my ($gene_id, $desc,$source_id,$label); diff --git a/misc-scripts/xref_mapping/XrefMapper/OfficialNaming.pm b/misc-scripts/xref_mapping/XrefMapper/OfficialNaming.pm index cfb5e42b67..65227c7439 100644 --- a/misc-scripts/xref_mapping/XrefMapper/OfficialNaming.pm +++ b/misc-scripts/xref_mapping/XrefMapper/OfficialNaming.pm @@ -114,8 +114,10 @@ sub get_official_name { ################################################## sub run { my $self = shift; + my $species_id = shift; my $dbname = $self->get_official_name(); + my $dbi = $self->xref->dbc; ########################################################### # If no offical name then we do not want to go any further @@ -125,7 +127,8 @@ sub run { $self->update_process_status("official_naming_done"); return; } - $self->species_id($self->get_id_from_species_name($self->core->species)); + $species_id = $self->get_id_from_species_name($self->core->species) unless defined $species_id; + $self->species_id($species_id); ########################################################### @@ -157,30 +160,30 @@ sub run { ###################################################### # Get the current max values for xref and object_xref ###################################################### - my ($max_object_xref_id, $max_xref_id) = $self->find_max_ids(); + my ($max_object_xref_id, $max_xref_id) = $self->find_max_ids($dbi); my %display_label_to_desc; - $self->get_display_label_data(\%display_label_to_desc); + $self->get_display_label_data(\%display_label_to_desc, $dbi); my %synonym; - $self->get_synonyms(\%synonym); + $self->get_synonyms(\%synonym, $dbi); # get the officail naming external_sources - my $dbname_to_source_id = $self->get_new_dbname_sources(); # reference to hash + my $dbname_to_source_id = $self->get_new_dbname_sources($dbi); # reference to hash ########################### # Delete the old ones. ########################### - $self->delete_old_data($dbname_to_source_id); + $self->delete_old_data($dbname_to_source_id, $dbi); # # Different to to the other names above as not all will be new ones. # For vega tran entries we only want to delete the new ones, those having 'MISC' as info_type; # - $self->clean_up_vega_tran_entries($dbname_to_source_id->{"Clone_based_vega_transcript"}); + $self->clean_up_vega_tran_entries($dbname_to_source_id->{"Clone_based_vega_transcript"}, $dbi); - $self->reset_display_xrefs(); + $self->reset_display_xrefs($dbi); my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(-dbconn => $self->core->dbc); my $ga = $db->get_GeneAdaptor(); @@ -197,7 +200,7 @@ SELECT gtt.gene_id, gtt.transcript_id, gsi.stable_id, tsi.stable_id ORDER BY gsi.stable_id, tsi.stable_id SQ0 - my $sth = $self->xref->dbc->prepare($sql); + my $sth = $dbi->prepare($sql); $sth->execute; my ($gene_id, $tran_id, $gsi, $tsi); @@ -212,11 +215,11 @@ SQ0 $tran_id_to_stable_id{$tran_id} = $tsi; } - my $dbentrie_sth = $self->get_dbentrie_sth(); - my $ins_xref_sth = $self->get_ins_xref_sth(); - my $ins_dep_ix_sth = $self->get_ins_dep_ix_sth(); - my $ins_object_xref_sth = $self->get_ins_object_xref_sth(); - my $set_gene_display_xref_sth = $self->get_set_gene_display_xref_sth(); + my $dbentrie_sth = $self->get_dbentrie_sth($dbi); + my $ins_xref_sth = $self->get_ins_xref_sth($dbi); + my $ins_dep_ix_sth = $self->get_ins_dep_ix_sth($dbi); + my $ins_object_xref_sth = $self->get_ins_object_xref_sth($dbi); + my $set_gene_display_xref_sth = $self->get_set_gene_display_xref_sth($dbi); my %xref_added; # store those added $xref_added{$accession:$source_id} = $xref_id; my %seen_gene; @@ -240,7 +243,7 @@ SQ0 IEG my %ignore_object; - my $ignore_sth = $self->xref->dbc->prepare($ignore_sql); + my $ignore_sth = $dbi->prepare($ignore_sql); $ignore_sth->execute(); my ($ignore_object_xref_id); $ignore_sth->bind_columns(\$ignore_object_xref_id); @@ -269,7 +272,8 @@ IEG label_to_desc => \%display_label_to_desc, synomnym => \%synonym, xref_added => \%xref_added, - cbvt => $dbname_to_source_id->{"Clone_based_vega_transcript"} + cbvt => $dbname_to_source_id->{"Clone_based_vega_transcript"}, + dbi => $dbi, }); if (!defined($ens_clone_genes{$gene_id})) { #we're processing this gene for the first time @@ -282,7 +286,8 @@ IEG gene_to_tran => \%gene_to_transcripts, tran_to_vega_name => $tran_to_vega_name, gene_id_to_stable_id => \%gene_id_to_stable_id, - official_name_used => \%official_name_used + official_name_used => \%official_name_used, + dbi => $dbi }); if (defined($gene_symbol_xref_id)) { @@ -293,7 +298,7 @@ IEG # If not found see if there is an LRG entry ############################################ if(!defined($gene_symbol)){ # look for LRG - ($gene_symbol, $gene_symbol_xref_id, $is_lrg) = $self->find_lrg_hgnc($gene_id); + ($gene_symbol, $gene_symbol_xref_id, $is_lrg) = $self->find_lrg_hgnc($gene_id, $dbi); } #################################################### @@ -305,6 +310,7 @@ IEG $self->find_from_other_sources(\%ignore_object, {gene_id => $gene_id, label_to_desc => \%display_label_to_desc, + dbi => $dbi, tran_source => \$tran_source}); } @@ -350,7 +356,7 @@ IEG my $desc = $display_label_to_desc{$gene_symbol}; if(!defined($gene_symbol_xref_id)){ - $gene_symbol_xref_id = $self->get_xref_id_for_gene_symbol($gene_symbol, $dbname); + $gene_symbol_xref_id = $self->get_xref_id_for_gene_symbol($gene_symbol, $dbname, $dbi); if(!defined($gene_symbol_xref_id)){ carp "BLOOMING NORA could not find $gene_symbol in $dbname\n"; next; @@ -365,6 +371,7 @@ IEG gene_id_to_stable_id => \%gene_id_to_stable_id, gene_symbol => $gene_symbol, desc => $desc, + dbi => $dbi, source_id => $dbname_to_source_id->{$tran_source."_trans_name"}, xref_added => \%xref_added, seen_gene => \%seen_gene, @@ -385,6 +392,7 @@ IEG max_xref => \$max_xref_id, max_object => \$max_object_xref_id, xref_added => \%xref_added, + dbi => $dbi, gene_to_tran => \%gene_to_transcripts, tran_to_vega_ext => $tran_to_vega_ext, ens_clone_genes => \%ens_clone_genes, @@ -423,6 +431,7 @@ sub get_official_domain_name{ my $tran_to_vega_name = $arg_ref->{tran_to_vega_name}; my $gene_to_transcripts = $arg_ref->{gene_to_tran}; my $official_name_used = $arg_ref->{official_name_used}; + my $dbi = $arg_ref->{dbi}; my $dbname = $self->get_official_name(); @@ -430,7 +439,7 @@ sub get_official_domain_name{ my $gene_symbol_xref_id = undef; - my $dbentrie_sth = $self->get_dbentrie_sth(); + my $dbentrie_sth = $self->get_dbentrie_sth($dbi); my %ODN=(); my %xref_id_to_display; @@ -462,7 +471,7 @@ sub get_official_domain_name{ if(($count > 1) and (scalar(keys %ODN) == 1)){ # found one that is "best" so set it and remove others print "For gene ".$gene_id_to_stable_id->{$gene_id}." we have mutiple ".$dbname."'s\n"; - ($gene_symbol, $gene_symbol_xref_id) = $self->set_the_best_odns(\%ODN, \@list, \@list_ox, \%xref_id_to_display); + ($gene_symbol, $gene_symbol_xref_id) = $self->set_the_best_odns(\%ODN, \@list, \@list_ox, \%xref_id_to_display, $dbi); if(defined($gene_symbol)){ return $gene_symbol, $gene_symbol_xref_id; } @@ -474,7 +483,7 @@ sub get_official_domain_name{ if(scalar(keys %ODN) > 1){ #if we have more than 1 xref, fail xrefs with worse % identity if we can (query or target identity whichever is greater) - my $identity_sth = $self->get_best_identity_sth(); + my $identity_sth = $self->get_best_identity_sth($dbi); $identity_sth->execute($dbname, $gene_id, "Gene"); my ($xref_id, $best_identity); $identity_sth->bind_columns(\$xref_id, \$best_identity); @@ -537,7 +546,7 @@ sub get_official_domain_name{ %ODN = %best_ids; print "For gene ".$gene_id_to_stable_id->{$gene_id}." we have mutiple ".$dbname."'s\n"; #set statuses for xrefs with worse % identity to MULTI_DELETE - ($gene_symbol, $gene_symbol_xref_id) = $self->set_the_best_odns(\%ODN, \@list, \@list_ox, \%xref_id_to_display); + ($gene_symbol, $gene_symbol_xref_id) = $self->set_the_best_odns(\%ODN, \@list, \@list_ox, \%xref_id_to_display, $dbi); if( defined($gene_symbol) && scalar(keys %ODN == 1) ){ return $gene_symbol, $gene_symbol_xref_id; } @@ -602,12 +611,13 @@ sub set_transcript_and_gene_display_xref_via_clone_name{ my $vega_clone_name = $arg_ref->{vega_clone_name}; my $clone_name = $arg_ref->{clone_name}; my $ens_clone_names = $arg_ref->{ens_clone_genes}; + my $dbi = $arg_ref->{dbi}; - my $ins_xref_sth = $self->get_ins_xref_sth(); - my $ins_dep_ix_sth = $self->get_ins_dep_ix_sth(); - my $set_tran_display_xref_sth = $self->get_set_transcript_display_xref_sth(); - my $ins_object_xref_sth = $self->get_ins_object_xref_sth(); - my $set_gene_display_xref_sth = $self->get_set_gene_display_xref_sth(); + my $ins_xref_sth = $self->get_ins_xref_sth($dbi); + my $ins_dep_ix_sth = $self->get_ins_dep_ix_sth($dbi); + my $set_tran_display_xref_sth = $self->get_set_transcript_display_xref_sth($dbi); + my $ins_object_xref_sth = $self->get_ins_object_xref_sth($dbi); + my $set_gene_display_xref_sth = $self->get_set_gene_display_xref_sth($dbi); my $keep_gene; @@ -719,13 +729,14 @@ sub set_transcript_display_xrefs{ my $tran_to_vega_ext = $arg_ref->{tran_to_vega_ext}; my $tran_source = $arg_ref->{tran_source}; my $gene_id_to_stable_id = $arg_ref->{gene_id_to_stable_id}; + my $dbi = $arg_ref->{dbi}; # statement handles needed - my $ins_xref_sth = $self->get_ins_xref_sth(); - my $ins_dep_ix_sth = $self->get_ins_dep_ix_sth(); - my $set_tran_display_xref_sth = $self->get_set_transcript_display_xref_sth(); - my $ins_object_xref_sth = $self->get_ins_object_xref_sth(); + my $ins_xref_sth = $self->get_ins_xref_sth($dbi); + my $ins_dep_ix_sth = $self->get_ins_dep_ix_sth($dbi); + my $set_tran_display_xref_sth = $self->get_set_transcript_display_xref_sth($dbi); + my $ins_object_xref_sth = $self->get_ins_object_xref_sth($dbi); if ($gene_id_to_stable_id->{$gene_id} =~ /LRG/) { return; } @@ -769,10 +780,10 @@ sub set_transcript_display_xrefs{ ################################################# sub get_dbentrie_sth{ my $self = shift; + my $dbi = shift; - if(!defined($self->{'_dbentrie_sth'})){ - my $sql =(<<"SQ1"); + my $sql =(<<"SQ1"); SELECT x.label, x.xref_id, ox.object_xref_id, s.priority FROM xref x, object_xref ox, source s WHERE x.xref_id = ox.xref_id AND @@ -782,9 +793,8 @@ SELECT x.label, x.xref_id, ox.object_xref_id, s.priority ox.ensembl_id = ? AND ox.ensembl_object_type = ? SQ1 - $self->{'_dbentrie_sth'} = $self->xref->dbc->prepare($sql); - } - return $self->{'_dbentrie_sth'}; + my $sth = $dbi->prepare($sql); + return $sth; } ################################################# @@ -794,10 +804,10 @@ SQ1 ################################################# sub get_dbentrie_with_desc_sth{ my $self = shift; + my $dbi = shift; - if(!defined($self->{'_dbentrie_desc_sth'})){ - my $sql =(<<"SQD"); + my $sql =(<<"SQD"); SELECT x.label, x.xref_id, ox.object_xref_id, s.priority, x.description FROM xref x, object_xref ox, source s WHERE x.xref_id = ox.xref_id AND @@ -807,9 +817,8 @@ SELECT x.label, x.xref_id, ox.object_xref_id, s.priority, x.description ox.ensembl_id = ? AND ox.ensembl_object_type = ? SQD - $self->{'_dbentrie_desc_sth'} = $self->xref->dbc->prepare($sql); - } - return $self->{'_dbentrie_desc_sth'}; + my $sth = $dbi->prepare($sql); + return $sth; } ################################################# @@ -818,9 +827,9 @@ SQD ################################################# sub get_best_identity_sth{ my $self = shift; + my $dbi = shift; - if(!defined($self->{'_best_identity_sth'})){ - my $sql =(<<"SQD"); + my $sql =(<<"SQD"); SELECT x.xref_id, CASE WHEN ix.query_identity >= ix.target_identity THEN ix.query_identity ELSE ix.target_identity END as best_identity FROM xref x, object_xref ox, identity_xref ix, source s @@ -829,9 +838,8 @@ WHERE x.xref_id = ox.xref_id AND x.source_id = s.source_id AND ox.ox_status = 'DUMP_OUT' AND ox.ensembl_id = ? AND ox.ensembl_object_type = ? order by best_identity DESC SQD - $self->{'_best_identity_sth'} = $self->xref->dbc->prepare($sql); - } - return $self->{'_best_identity_sth'}; + my $sth = $dbi->prepare($sql); + return $sth; } @@ -842,10 +850,9 @@ SQD ################################################# sub get_set_transcript_display_xref_sth { my $self = shift; - if(!defined($self->{'_set_tran_display'})){ - $self->{'_set_tran_display'} = $self->xref->dbc->prepare('UPDATE transcript_stable_id SET display_xref_id =? where internal_id = ?'); - } - return $self->{'_set_tran_display'} + my $dbi = shift; + my $sth = $dbi->prepare('UPDATE transcript_stable_id SET display_xref_id =? where internal_id = ?'); + return $sth; } @@ -856,10 +863,9 @@ sub get_set_transcript_display_xref_sth { ################################################# sub get_set_gene_display_xref_sth { my $self = shift; - if(!defined($self->{'_set_gene_display'})){ - $self->{'_set_gene_display'} = $self->xref->dbc->prepare('UPDATE gene_stable_id SET display_xref_id =? where internal_id = ?'); - } - return $self->{'_set_gene_display'} + my $dbi = shift; + my $sth = $dbi->prepare('UPDATE gene_stable_id SET display_xref_id =? where internal_id = ?'); + return $sth; } @@ -868,13 +874,11 @@ sub get_set_gene_display_xref_sth { ############################################### sub get_ins_xref_sth{ my $self= shift; + my $dbi = shift; - if(!defined($self->{'_ins_xref_sth'})){ - my $sql = "insert ignore into xref (xref_id, source_id, accession, label, version, species_id, info_type, info_text, description) values (?, ?, ?, ?, 0, ".$self->species_id.", 'MISC', ?, ? )"; - print "_ins_xref sql is:-\n$sql\n"; - $self->{'_ins_xref_sth'} = $self->xref->dbc->prepare($sql); - } - return $self->{'_ins_xref_sth'}; + my $sql = "insert ignore into xref (xref_id, source_id, accession, label, version, species_id, info_type, info_text, description) values (?, ?, ?, ?, 0, ".$self->species_id.", 'MISC', ?, ? )"; + my $sth = $dbi->prepare($sql); + return $sth; } @@ -883,12 +887,11 @@ sub get_ins_xref_sth{ ################################################# sub get_ins_dep_ix_sth{ my $self= shift; + my $dbi = shift; - if(!defined($self->{'_ins_identity_sth'})){ - my $sql = "insert into identity_xref (object_xref_id, query_identity, target_identity) values(?, ?, ?)"; - $self->{'_ins_identity_sth'} = $self->xref->dbc->prepare($sql); - } - return $self->{'_ins_identity_sth'}; + my $sql = "insert into identity_xref (object_xref_id, query_identity, target_identity) values(?, ?, ?)"; + my $sth = $dbi->prepare($sql); + return $sth; } ############################################### @@ -896,34 +899,34 @@ sub get_ins_dep_ix_sth{ ############################################### sub get_ins_object_xref_sth{ my $self= shift; + my $dbi = shift; - if(!defined($self->{'_ins_object_xref_sth'})){ - my $sql = "insert into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id, linkage_type, ox_status, unused_priority) values (?, ?, ?, ?, 'MISC', 'DUMP_OUT', ?)"; - $self->{'_ins_object_xref_sth'} = $self->xref->dbc->prepare($sql); - } - return $self->{'_ins_object_xref_sth'}; + my $sql = "insert into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id, linkage_type, ox_status, unused_priority) values (?, ?, ?, ?, 'MISC', 'DUMP_OUT', ?)"; + my $sth = $dbi->prepare($sql); + return $sth; } sub find_max_ids{ my $self = shift; + my $dbi = shift; my ($max_object_xref_id, $max_object_xref_id2, $max_xref_id); - my $sth = $self->xref->dbc->prepare("SELECT MAX(object_xref_id) FROM object_xref"); + my $sth = $dbi->prepare("SELECT MAX(object_xref_id) FROM object_xref"); $sth->execute(); $sth->bind_columns(\$max_object_xref_id); $sth->fetch; - $sth = $self->xref->dbc->prepare("SELECT MAX(object_xref_id) FROM identity_xref"); + $sth = $dbi->prepare("SELECT MAX(object_xref_id) FROM identity_xref"); $sth->execute(); $sth->bind_columns(\$max_object_xref_id2); $sth->fetch; - $sth = $self->xref->dbc->prepare("SELECT MAX(xref_id) FROM xref"); + $sth = $dbi->prepare("SELECT MAX(xref_id) FROM xref"); $sth->execute(); $sth->bind_columns(\$max_xref_id); $sth->fetch; @@ -943,6 +946,7 @@ sub get_tran_to_vega_data{ my $ref_synonym = $arg_ref->{synonym}; my $ref_xref_added = $arg_ref->{xref_added}; my $clone_based_vega_transcript_id = $arg_ref->{cbvt}; + my $dbi = $arg_ref->{dbi}; my %tran_to_vega_ext; my %tran_to_vega_name; @@ -950,7 +954,7 @@ sub get_tran_to_vega_data{ my $dbname = $self->get_official_name(); - my $dbentrie_sth = $self->get_dbentrie_sth(); + my $dbentrie_sth = $self->get_dbentrie_sth($dbi); my ($display, $xref_id, $object_xref_id, $level); foreach my $tran_id ( @{$ref_gene_to_transcripts->{$gene_id}} ){ @@ -1013,7 +1017,7 @@ sub get_tran_to_vega_data{ sub get_synonyms{ - my ($self, $synonym) = @_; + my ($self, $synonym, $dbi) = @_; my $dbname = $self->get_official_name(); @@ -1025,7 +1029,7 @@ SELECT es.synonym, x.label s.name = '$dbname' SYN - my $sth = $self->xref->dbc->prepare($syn_sql); + my $sth = $dbi->prepare($syn_sql); $sth->execute(); my ($syn, $name); $sth->bind_columns(\$syn,\$name); @@ -1038,7 +1042,7 @@ SYN sub get_display_label_data{ # my ($self, $label_to_id, $label_to_desc) = @_; - my ($self, $label_to_desc) = @_; + my ($self, $label_to_desc, $dbi) = @_; my $dbname = $self->get_official_name(); @@ -1050,7 +1054,7 @@ SELECT x.accession, sy.synonym, x.description so.name like '$dbname' GD1 - my $gd1_sth = $self->xref->dbc->prepare($gd1_sql); + my $gd1_sth = $dbi->prepare($gd1_sql); $gd1_sth->execute(); my ($display_label, $acc, $syn, $desc); @@ -1071,7 +1075,7 @@ SELECT x.accession, x.label, x.description s.name like '$dbname' GD2 - my $gd2_sth = $self->xref->dbc->prepare($gd2_sql); + my $gd2_sth = $dbi->prepare($gd2_sql); $gd2_sth->execute(); $gd2_sth->bind_columns(\$acc,\$display_label, \$desc); @@ -1106,10 +1110,11 @@ sub find_from_other_sources{ my $tran_source = $ref_args->{tran_source}; my $gene_id = $ref_args->{gene_id}; my $display_label_to_desc = $ref_args->{label_to_desc}; + my $dbi = $ref_args->{dbi}; my %ignore_object = %{$ignore_object}; my ($gene_symbol, $gene_symbol_xref_id); - my $dbentrie_sth = $self->get_dbentrie_with_desc_sth(); + my $dbentrie_sth = $self->get_dbentrie_with_desc_sth($dbi); my $other_name_num = $self->get_other_name_hash(); my ($display, $xref_id, $object_xref_id, $level, $desc); @@ -1153,18 +1158,16 @@ sub find_from_other_sources{ # sub get_delete_odn_sth{ - my ($self) = @_; + my ($self, $dbi) = @_; - if(!defined($self->{_delete_odn_sth})){ - $self->{_delete_odn_sth} = $self->xref->dbc->prepare('UPDATE object_xref SET ox_status = "MULTI_DELETE" where object_xref_id = ?'); - } - return $self->{_delete_odn_sth}; + my $sth = $dbi->prepare('UPDATE object_xref SET ox_status = "MULTI_DELETE" where object_xref_id = ?'); + return $sth; } sub set_the_best_odns{ - my ($self, $odn, $ref_list, $ref_list_ox, $ref_xref_id_to_display) = @_; + my ($self, $odn, $ref_list, $ref_list_ox, $ref_xref_id_to_display, $dbi) = @_; - my $delete_odn_sth = $self->get_delete_odn_sth(); + my $delete_odn_sth = $self->get_delete_odn_sth($dbi); my %ODN = %$odn; my $gene_symbol = undef; @@ -1194,10 +1197,9 @@ sub set_the_best_odns{ # Get the vega xref_id for a symbol # sub get_find_odn_sth{ - my ($self, $dbname) = @_; + my ($self, $dbname, $dbi) = @_; - if(!defined($self->{'_find_odn_sth'})){ - my $sql=(<<"SQ5"); + my $sql=(<<"SQ5"); SELECT x.xref_id FROM xref x, source s, object_xref ox WHERE ox.xref_id = x.xref_id AND @@ -1207,18 +1209,15 @@ SELECT x.xref_id s.priority_description like "vega" AND ox.ox_status ="DUMP_OUT" SQ5 - print $sql."\n"; - - $self->{'_find_odn_sth'} = $self->xref->dbc->prepare($sql); - } - return $self->{'_find_odn_sth'}; + my $sth = $dbi->prepare($sql); + return $sth; } sub get_xref_id_for_gene_symbol{ - my ($self, $gene_symbol, $dbname) = @_; + my ($self, $gene_symbol, $dbname, $dbi) = @_; my $xref_id = undef; - my $find_odn_sth = $self->get_find_odn_sth($dbname); + my $find_odn_sth = $self->get_find_odn_sth($dbname, $dbi); $find_odn_sth->execute($gene_symbol); $find_odn_sth->bind_columns(\$xref_id); @@ -1234,9 +1233,9 @@ sub get_xref_id_for_gene_symbol{ sub get_lrg_find_sth{ my $self = shift; + my $dbi = shift; - if(!defined($self->{'_lrg_find_sth'})){ - my $sql=(<<'SQ2'); + my $sql=(<<'SQ2'); SELECT x.label, x.xref_id, ox.object_xref_id, s.priority FROM xref x, object_xref ox, source s WHERE x.xref_id = ox.xref_id AND @@ -1245,27 +1244,24 @@ SELECT x.label, x.xref_id, ox.object_xref_id, s.priority ox.ensembl_id = ? AND ox.ensembl_object_type = ? SQ2 - $self->{'_lrg_find_sth'} =$self->xref->dbc->prepare($sql); - } - return $self->{'_lrg_find_sth'}; + my $sth = $dbi->prepare($sql); + return $sth; } sub get_lrg_set_status_sth{ my $self = shift; + my $dbi = shift; - if(!defined($self->{'_lrg_set_status_sth'})){ - $self->{'_lrg_set_status_sth'} = - $self->xref->dbc->prepare("update object_xref set ox_status = 'NO_DISPLAY' where object_xref_id = ?"); - } - return $self->{'_lrg_set_status_sth'}; + my $sth = $dbi->prepare("update object_xref set ox_status = 'NO_DISPLAY' where object_xref_id = ?"); + return $sth; } sub get_lrg_to_hgnc_sth{ my $self = shift; + my $dbi = shift; - if(!defined($self->{'_lrg_to_hgnc_sth'})){ - my $sql=(<<'SQ4'); + my $sql=(<<'SQ4'); SELECT x.xref_id, s.priority FROM xref x,source s, object_xref ox WHERE x.xref_id = ox.xref_id AND @@ -1275,21 +1271,20 @@ SELECT x.xref_id, s.priority ox.ox_status = 'DUMP_OUT' ORDER BY s.priority SQ4 - $self->{'_lrg_to_hgnc_sth'} = $self->xref->dbc->prepare($sql); - } - return $self->{'_lrg_to_hgnc_sth'}; + my $sth = $dbi->prepare($sql); + return $sth; } sub find_lrg_hgnc{ - my ($self, $gene_id) =@_; + my ($self, $gene_id, $dbi) =@_; my $gene_symbol; my $gene_symbol_xref_id; my $is_lrg = 0; - my $lrg_find_sth = $self->get_lrg_find_sth(); - my $lrg_set_status_sth = $self->get_lrg_set_status_sth(); - my $lrg_to_hgnc_sth = $self->get_lrg_to_hgnc_sth(); + my $lrg_find_sth = $self->get_lrg_find_sth($dbi); + my $lrg_set_status_sth = $self->get_lrg_set_status_sth($dbi); + my $lrg_to_hgnc_sth = $self->get_lrg_to_hgnc_sth($dbi); # look for LRG_HGNC_notransfer, if found then find HGNC equiv and set to this # print "LRG FOUND with no HGNC, should have gotten this via the alt allele table?? gene_id = $gene_id\n"; @@ -1369,6 +1364,7 @@ sub get_clone_name{ # sub get_new_dbname_sources{ my $self = shift; + my $dbi = shift; my %dbname_to_source_id; @@ -1388,7 +1384,7 @@ EntrezGene_trans_name); push @list, $dbname."_trans_name"; push @list, $dbname; - my $sth = $self->xref->dbc->prepare("select source_id from source where name like ?"); + my $sth = $dbi->prepare("select source_id from source where name like ?"); my $source_error = 0; foreach my $source (@list){ @@ -1414,7 +1410,7 @@ EntrezGene_trans_name); # my $list = "$odn_tran_id, $clone_based_vega_gene_id, $clone_based_ensembl_gene_id, $clone_based_ensembl_tran_id, $rfam_tran_id, $rfam_gene_id, $mirbase_tran_id, $mirbase_gene_id"; sub delete_old_data{ - my ($self, $dbname_to_source_id) = @_; + my ($self, $dbname_to_source_id, $dbi) = @_; my $dbname = $self->get_official_name(); @@ -1443,7 +1439,7 @@ DELETE s x.source_id in ( $list ); DE1 - my $sth = $self->xref->dbc->prepare($sql); + my $sth = $dbi->prepare($sql); $sth->execute(); @@ -1454,7 +1450,7 @@ DELETE i x.xref_id = o.xref_id AND x.source_id in ( $list ) DE2 - $sth = $self->xref->dbc->prepare($del_identity_sql); + $sth = $dbi->prepare($del_identity_sql); $sth->execute(); my $del_ox_sql = (<<"DE3"); @@ -1463,18 +1459,18 @@ DELETE o WHERE x.xref_id = o.xref_id AND x.source_id in ( $list ) DE3 - $sth = $self->xref->dbc->prepare($del_ox_sql); + $sth = $dbi->prepare($del_ox_sql); $sth->execute(); my $del_x_sql = "delete x from xref x where x.source_id in ( $list )"; - $sth = $self->xref->dbc->prepare($del_x_sql); + $sth = $dbi->prepare($del_x_sql); $sth->execute(); return; } sub clean_up_vega_tran_entries{ - my ($self, $clone_based_vega_tran_id) = @_; + my ($self, $clone_based_vega_tran_id, $dbi) = @_; my $del_synonym_sql = (<<"CUV1"); # original ones added have info type of "DIRECT" DELETE s @@ -1485,7 +1481,7 @@ DELETE s CUV1 - my $sth = $self->xref->dbc->prepare($del_synonym_sql); + my $sth = $dbi->prepare($del_synonym_sql); $sth->execute(); my $del_x_sql = (<<"CUV2"); # original ones added have info type of "DIRECT" @@ -1495,7 +1491,7 @@ DELETE x x.info_type = 'MISC' CUV2 - $sth = $self->xref->dbc->prepare($del_x_sql); + $sth = $dbi->prepare($del_x_sql); $sth->execute(); return; } @@ -1503,8 +1499,9 @@ CUV2 sub reset_display_xrefs{ my $self = shift; + my $dbi = shift; - my $sth = $self->xref->dbc->prepare("update transcript_stable_id set display_xref_id = null"); + my $sth = $dbi->prepare("update transcript_stable_id set display_xref_id = null"); $sth->execute; $sth = $self->xref->dbc->prepare("UPDATE gene_stable_id SET display_xref_id = null, desc_set =0"); diff --git a/misc-scripts/xref_mapping/XrefMapper/ProcessMappings.pm b/misc-scripts/xref_mapping/XrefMapper/ProcessMappings.pm index ca738ddc4b..13b0b14036 100644 --- a/misc-scripts/xref_mapping/XrefMapper/ProcessMappings.pm +++ b/misc-scripts/xref_mapping/XrefMapper/ProcessMappings.pm @@ -71,9 +71,11 @@ sub process_mappings { my %query_cutoff; my %target_cutoff; my ($job_id, $percent_query_cutoff, $percent_target_cutoff); - my $sth = $self->xref->dbc->prepare("select job_id, percent_query_cutoff, percent_target_cutoff from mapping"); + my $dbi = $self->xref->dbc(); + my $sth = $dbi->prepare("select job_id, percent_query_cutoff, percent_target_cutoff from mapping"); $sth->execute(); $sth->bind_columns(\$job_id, \$percent_query_cutoff, \$percent_target_cutoff); + my $object_xref_id; while($sth->fetch){ $query_cutoff{$job_id} = $percent_query_cutoff; @@ -83,7 +85,7 @@ sub process_mappings { my ($root_dir, $map, $status, $out, $err, $array_number); my ($map_file, $out_file, $err_file); - my $map_sth = $self->xref->dbc->prepare("select root_dir, map_file, status, out_file, err_file, array_number, job_id from mapping_jobs"); + my $map_sth = $dbi->prepare("select root_dir, map_file, status, out_file, err_file, array_number, job_id from mapping_jobs"); $map_sth->execute(); $map_sth->bind_columns(\$root_dir, \$map, \$status, \$out, \$err, \$array_number, \$job_id); my $already_processed_count = 0; @@ -91,7 +93,7 @@ sub process_mappings { my $error_count = 0; my $empty_count = 0; - my $stat_sth = $self->xref->dbc->prepare("update mapping_jobs set status = ? where job_id = ? and array_number = ?"); + my $stat_sth = $dbi->prepare("update mapping_jobs set status = ? where job_id = ? and array_number = ?"); while($map_sth->fetch()){ my $err_file = $root_dir."/".$err; @@ -119,7 +121,7 @@ sub process_mappings { } else{ #err file checks out so process the mapping file. if(-e $map_file){ - my $count = $self->process_map_file($map_file, $query_cutoff{$job_id}, $target_cutoff{$job_id}, $job_id, $array_number); + my $count = $self->process_map_file($map_file, $query_cutoff{$job_id}, $target_cutoff{$job_id}, $job_id, $array_number, $dbi); if( $count > 0){ $processed_count++; $stat_sth->execute('SUCCESS',$job_id, $array_number); @@ -149,7 +151,7 @@ sub process_mappings { print "already processed = $already_processed_count, processed = $processed_count, errors = $error_count, empty = $empty_count\n" if($self->verbose); if(!$error_count){ - my $sth = $self->xref->dbc->prepare("insert into process_status (status, date) values('mapping_processed',now())"); + my $sth = $dbi->prepare("insert into process_status (status, date) values('mapping_processed',now())"); $sth->execute(); $sth->finish; } @@ -160,13 +162,13 @@ sub process_mappings { #return number of lines parsed if succesfull. -1 for fail sub process_map_file{ - my ($self, $map_file, $query_cutoff, $target_cutoff, $job_id, $array_number) = @_; + my ($self, $map_file, $query_cutoff, $target_cutoff, $job_id, $array_number, $dbi) = @_; my $ret = 1; my $ensembl_type = "Translation"; - if($map_file =~ /_dna_/){ + if($map_file =~ /dna_/){ $ensembl_type = "Transcript"; } @@ -179,14 +181,14 @@ sub process_map_file{ my $total_lines = 0; my $root_dir = $self->core->dir; - my $ins_go_sth = $self->xref->dbc->prepare("insert ignore into go_xref (object_xref_id, linkage_type, source_xref_id) values(?,?,?)"); - my $dep_sth = $self->xref->dbc->prepare("select dependent_xref_id, linkage_annotation from dependent_xref where master_xref_id = ?"); - my $start_sth = $self->xref->dbc->prepare("update mapping_jobs set object_xref_start = ? where job_id = ? and array_number = ?"); - my $end_sth = $self->xref->dbc->prepare("update mapping_jobs set object_xref_end = ? where job_id = ? and array_number = ?"); -# my $update_dependent_xref_sth = $self->xref->dbc->prepare("update dependent_xref set object_xref_id = ? where master_xref_id = ? and dependent_xref_id =?"); + my $ins_go_sth = $dbi->prepare("insert ignore into go_xref (object_xref_id, linkage_type, source_xref_id) values(?,?,?)"); + my $dep_sth = $dbi->prepare("select dependent_xref_id, linkage_annotation from dependent_xref where master_xref_id = ?"); + my $start_sth = $dbi->prepare("update mapping_jobs set object_xref_start = ? where job_id = ? and array_number = ?"); + my $end_sth = $dbi->prepare("update mapping_jobs set object_xref_end = ? where job_id = ? and array_number = ?"); +# my $update_dependent_xref_sth = $dbi->prepare("update dependent_xref set object_xref_id = ? where master_xref_id = ? and dependent_xref_id =?"); my $object_xref_id; - my $sth = $self->xref->dbc->prepare("select max(object_xref_id) from object_xref"); + my $sth = $dbi->prepare("select max(object_xref_id) from object_xref"); $sth->execute(); $sth->bind_columns(\$object_xref_id); $sth->fetch(); @@ -196,19 +198,21 @@ sub process_map_file{ } - my $object_xref_sth = $self->xref->dbc->prepare("insert into object_xref (object_xref_id, ensembl_id,ensembl_object_type, xref_id, linkage_type, ox_status ) values (?, ?, ?, ?, ?, ?)"); - my $object_xref_sth2 = $self->xref->dbc->prepare("insert into object_xref (object_xref_id, ensembl_id,ensembl_object_type, xref_id, linkage_type, ox_status, master_xref_id ) values (?, ?, ?, ?, ?, ?, ?)"); + my $object_xref_sth = $dbi->prepare("insert into object_xref (ensembl_id,ensembl_object_type, xref_id, linkage_type, ox_status ) values (?, ?, ?, ?, ?)"); + my $object_xref_sth2 = $dbi->prepare("insert into object_xref (ensembl_id,ensembl_object_type, xref_id, linkage_type, ox_status, master_xref_id ) values (?, ?, ?, ?, ?, ?)"); + my $get_object_xref_id_sth = $dbi->prepare("select object_xref_id from object_xref where ensembl_id = ? and ensembl_object_type = ? and xref_id = ? and linkage_type = ? and ox_status = ?"); + my $get_object_xref_id_master_sth = $dbi->prepare("select object_xref_id from object_xref where ensembl_id = ? and ensembl_object_type = ? and xref_id = ? and linkage_type = ? and ox_status = ? and master_xref_id = ?"); local $object_xref_sth->{RaiseError}; #catch duplicates local $object_xref_sth->{PrintError}; # cut down on error messages local $object_xref_sth2->{RaiseError}; #catch duplicates local $object_xref_sth2->{PrintError}; # cut down on error messages - my $identity_xref_sth = $self->xref->dbc->prepare("insert ignore into identity_xref (object_xref_id, query_identity, target_identity, hit_start, hit_end, translation_start, translation_end, cigar_line, score ) values (?, ?, ?, ?, ?, ?, ?, ?, ?)"); + my $identity_xref_sth = $dbi->prepare("insert ignore into identity_xref (object_xref_id, query_identity, target_identity, hit_start, hit_end, translation_start, translation_end, cigar_line, score ) values (?, ?, ?, ?, ?, ?, ?, ?, ?)"); - my $ins_dep_ix_sth = $self->xref->dbc->prepare("insert ignore into identity_xref (object_xref_id, query_identity, target_identity) values(?, ?, ?)"); + my $ins_dep_ix_sth = $dbi->prepare("insert ignore into identity_xref (object_xref_id, query_identity, target_identity) values(?, ?, ?)"); - my $source_name_sth = $self->xref->dbc->prepare("select s.name from xref x join source s using(source_id) where x.xref_id = ?"); - my $biotype_sth = $self->xref->dbc->prepare("select biotype from transcript_stable_id where internal_id = ?"); + my $source_name_sth = $dbi->prepare("select s.name from xref x join source s using(source_id) where x.xref_id = ?"); + my $biotype_sth = $dbi->prepare("select biotype from transcript_stable_id where internal_id = ?"); my $last_query_id = 0; my $best_match_found = 0; @@ -300,8 +304,9 @@ sub process_map_file{ $status = "FAILED_CUTOFF"; } - $object_xref_id++; - $object_xref_sth->execute($object_xref_id, $target_id, $ensembl_type, $query_id, 'SEQUENCE_MATCH', $status) ; + $object_xref_sth->execute($target_id, $ensembl_type, $query_id, 'SEQUENCE_MATCH', $status) ; + $get_object_xref_id_sth->execute($target_id, $ensembl_type, $query_id, 'SEQUENCE_MATCH', $status); + $object_xref_id = ($get_object_xref_id_sth->fetchrow_array())[0]; if($object_xref_sth->err){ my $err = $object_xref_sth->errstr; if($err =~ /Duplicate/){ @@ -338,8 +343,9 @@ sub process_map_file{ $dep_sth->execute($master_xref_id); $dep_sth->bind_columns(\$dep_xref_id, \$link); while($dep_sth->fetch){ - $object_xref_id++; - $object_xref_sth2->execute($object_xref_id, $target_id, $ensembl_type, $dep_xref_id, 'DEPENDENT', $status, $master_xref_id); + $object_xref_sth2->execute($target_id, $ensembl_type, $dep_xref_id, 'DEPENDENT', $status, $master_xref_id); + $get_object_xref_id_master_sth->execute($target_id, $ensembl_type, $dep_xref_id, 'DEPENDENT', $status, $master_xref_id); + $object_xref_id = ($get_object_xref_id_master_sth->fetchrow_array())[0]; if($object_xref_sth2->err){ my $err = $object_xref_sth->errstr; if($err =~ /Duplicate/){ diff --git a/misc-scripts/xref_mapping/XrefMapper/ProcessPaired.pm b/misc-scripts/xref_mapping/XrefMapper/ProcessPaired.pm index 8cc60f3fd2..0f64579558 100644 --- a/misc-scripts/xref_mapping/XrefMapper/ProcessPaired.pm +++ b/misc-scripts/xref_mapping/XrefMapper/ProcessPaired.pm @@ -53,32 +53,26 @@ sub process{ print "Process Pairs\n" if($self->verbose); - - my $sth = $self->xref->dbc->prepare("select MAX(object_xref_id) from object_xref"); - $sth->execute; - my ($object_xref_id) = $sth->fetchrow_array(); - $object_xref_id++; - $sth->finish; - - print "Starting at object_xref of $object_xref_id\n" if($self->verbose); - + my $dbi = $self->xref->dbc; + my $object_xref_id; #this query gives us transcript RefSeq_mRNA% object xrefs, and the paired RefSeq_peptide% accession as well as the translation id for the transcript - my $transcr_obj_xrefs_sth = $self->xref->dbc->prepare("select gtt.translation_id, p.source_id, p.accession1, ix.query_identity, ix.target_identity from object_xref ox join xref x on (ox.xref_id = x.xref_id and ox.ox_status = 'DUMP_OUT') join source s on (x.source_id = s.source_id and s.name like 'RefSeq\_mRNA%') join pairs p on (x.accession = p.accession2) join gene_transcript_translation gtt on (gtt.transcript_id = ox.ensembl_id) join identity_xref ix using(object_xref_id)"); + my $transcr_obj_xrefs_sth = $dbi->prepare("select gtt.translation_id, p.source_id, p.accession1, ix.query_identity, ix.target_identity from object_xref ox join xref x on (ox.xref_id = x.xref_id and ox.ox_status = 'DUMP_OUT') join source s on (x.source_id = s.source_id and s.name like 'RefSeq\_mRNA%') join pairs p on (x.accession = p.accession2) join gene_transcript_translation gtt on (gtt.transcript_id = ox.ensembl_id) join identity_xref ix using(object_xref_id)"); #this query is used to check if and object_xref exists for the related translation and paired RefSeq_peptide% with a status of 'DUMP_OUT' - my $ox_translation_sth = $self->xref->dbc->prepare("select ox.object_xref_id, ox.xref_id from object_xref ox join xref x using(xref_id) where ox.ox_status in ('DUMP_OUT', 'FAILED_PRIORITY') and ox.ensembl_object_type = 'Translation' and ox.ensembl_id = ? and x.source_id = ? and x.accession = ?"); + my $ox_translation_sth = $dbi->prepare("select ox.object_xref_id, ox.xref_id from object_xref ox join xref x using(xref_id) where ox.ox_status in ('DUMP_OUT', 'FAILED_PRIORITY') and ox.ensembl_object_type = 'Translation' and ox.ensembl_id = ? and x.source_id = ? and x.accession = ?"); - my $ox_insert_sth = $self->xref->dbc->prepare("insert into object_xref (object_xref_id, xref_id, ensembl_id, ensembl_object_type, linkage_type, ox_status) values(?, ?, ?, ?, 'INFERRED_PAIR', 'DUMP_OUT')"); + my $ox_insert_sth = $dbi->prepare("insert into object_xref (xref_id, ensembl_id, ensembl_object_type, linkage_type, ox_status) values(?, ?, ?, 'INFERRED_PAIR', 'DUMP_OUT')"); + my $get_object_xref_id_sth = $dbi->prepare("select object_xref_id from object_xref where xref_id = ? and ensembl_id = ? and ensembl_object_type = ? and linkage_type = 'INFERRED_PAIR' and ox_status = 'DUMP_OUT'"); - my $xref_sth = $self->xref->dbc->prepare("select xref_id from xref where accession = ? and source_id = ?"); + my $xref_sth = $dbi->prepare("select xref_id from xref where accession = ? and source_id = ?"); - my $xref_update_sth = $self->xref->dbc->prepare("update xref set info_type = 'INFERRED_PAIR' where xref_id = ?"); - my $identity_update_sth = $self->xref->dbc->prepare("insert into identity_xref (object_xref_id, query_identity, target_identity) values(?, ?, ?)"); + my $xref_update_sth = $dbi->prepare("update xref set info_type = 'INFERRED_PAIR' where xref_id = ?"); + my $identity_update_sth = $dbi->prepare("insert into identity_xref (object_xref_id, query_identity, target_identity) values(?, ?, ?)"); - my $transl_object_xrefs_sth = $self->xref->dbc->prepare("select ox.object_xref_id, ox.ensembl_id, x.accession, gtt.transcript_id from gene_transcript_translation gtt join object_xref ox on (gtt.translation_id = ox.ensembl_id and ox.ensembl_object_type = 'Translation') join xref x on (ox.xref_id = x.xref_id and ox.ox_status = 'DUMP_OUT' and ox.ensembl_object_type = 'Translation') join source s on (x.source_id = s.source_id and s.name like 'RefSeq\_peptide%')"); + my $transl_object_xrefs_sth = $dbi->prepare("select ox.object_xref_id, ox.ensembl_id, x.accession, gtt.transcript_id from gene_transcript_translation gtt join object_xref ox on (gtt.translation_id = ox.ensembl_id and ox.ensembl_object_type = 'Translation') join xref x on (ox.xref_id = x.xref_id and ox.ox_status = 'DUMP_OUT' and ox.ensembl_object_type = 'Translation') join source s on (x.source_id = s.source_id and s.name like 'RefSeq\_peptide%')"); - my $ox_mark_delete_sth = $self->xref->dbc->prepare("update object_xref set ox_status = 'MULTI_DELETE' where object_xref_id = ?"); + my $ox_mark_delete_sth = $dbi->prepare("update object_xref set ox_status = 'MULTI_DELETE' where object_xref_id = ?"); $transcr_obj_xrefs_sth->execute(); @@ -106,7 +100,9 @@ sub process{ if (!$xref_id) { die("Xref not found for accession $pep_accession source_id $pep_source_id"); } - $ox_insert_sth->execute($object_xref_id, $xref_id, $translation_id, "Translation") || die "Could not insert object xref $object_xref_id: xref_id $xref_id, translation_id $translation_id" ; + $ox_insert_sth->execute($xref_id, $translation_id, "Translation") || die "Could not insert object xref $object_xref_id: xref_id $xref_id, translation_id $translation_id" ; + $get_object_xref_id_sth->execute($xref_id, $translation_id, 'Translation'); + $object_xref_id = ($get_object_xref_id_sth->fetchrow_array())[0]; $xref_update_sth->execute($xref_id)|| die "Could not update xref_id $xref_id"; if ($query_identity && $target_identity) { @@ -114,7 +110,6 @@ sub process{ } $change{'translation object xrefs added'}++; - $object_xref_id++; $transl_object_xref_id = $object_xref_id; } @@ -151,7 +146,7 @@ sub process{ #change the status to 'MULTI_DELETE' $ox_mark_delete_sth->execute($translation_object_xref_id) || die("Failed to update status to 'MULTI_DELETE for object_xref_id $translation_object_xref_id"); # Process all dependent xrefs as well - $self->process_dependents($translation_object_xref_id, $translation_id, $transcript_id); + $self->process_dependents($translation_object_xref_id, $translation_id, $transcript_id, $dbi); $change{'translation object xrefs removed'}++; } @@ -167,17 +162,17 @@ sub process{ } #update process status - my $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('processed_pairs',now())"); + my $sth_stat = $dbi->prepare("insert into process_status (status, date) values('processed_pairs',now())"); $sth_stat->execute(); $sth_stat->finish; } sub process_dependents { - my ($self, $translation_object_xref_id, $translation_id, $transcript_id) = @_; + my ($self, $translation_object_xref_id, $translation_id, $transcript_id, $dbi) = @_; - my $dep_tl_sth = $self->xref->dbc->prepare("select distinct dependent_ox.object_xref_id from object_xref master_ox, object_xref dependent_ox, xref dependent, xref master, dependent_xref dx where dependent.xref_id = dx.dependent_xref_id and master.xref_id = dx.master_xref_id and dependent.xref_id = dependent_ox.xref_id and master.xref_id = master_ox.xref_id and master_ox.object_xref_id = ? and dependent_ox.master_xref_id = master.xref_id and dependent_ox.ensembl_id = ? and dependent_ox.ensembl_object_type = 'Translation' and dependent_ox.ox_status = 'DUMP_OUT' "); - my $dep_tr_sth = $self->xref->dbc->prepare("select distinct dependent_ox.object_xref_id from object_xref master_ox, object_xref dependent_ox, xref dependent, xref master, dependent_xref dx where dependent.xref_id = dx.dependent_xref_id and master.xref_id = dx.master_xref_id and dependent.xref_id = dependent_ox.xref_id and master.xref_id = master_ox.xref_id and master_ox.object_xref_id = ? and dependent_ox.master_xref_id = master.xref_id and dependent_ox.ensembl_id = ? and dependent_ox.ensembl_object_type = 'Transcript' and dependent_ox.ox_status = 'DUMP_OUT' "); - my $ox_dx_delete_sth = $self->xref->dbc->prepare("update object_xref set ox_status = 'MULTI_DELETE' where object_xref_id = ?"); + my $dep_tl_sth = $dbi->prepare("select distinct dependent_ox.object_xref_id from object_xref master_ox, object_xref dependent_ox, xref dependent, xref master, dependent_xref dx where dependent.xref_id = dx.dependent_xref_id and master.xref_id = dx.master_xref_id and dependent.xref_id = dependent_ox.xref_id and master.xref_id = master_ox.xref_id and master_ox.object_xref_id = ? and dependent_ox.master_xref_id = master.xref_id and dependent_ox.ensembl_id = ? and dependent_ox.ensembl_object_type = 'Translation' and dependent_ox.ox_status = 'DUMP_OUT' "); + my $dep_tr_sth = $dbi->prepare("select distinct dependent_ox.object_xref_id from object_xref master_ox, object_xref dependent_ox, xref dependent, xref master, dependent_xref dx where dependent.xref_id = dx.dependent_xref_id and master.xref_id = dx.master_xref_id and dependent.xref_id = dependent_ox.xref_id and master.xref_id = master_ox.xref_id and master_ox.object_xref_id = ? and dependent_ox.master_xref_id = master.xref_id and dependent_ox.ensembl_id = ? and dependent_ox.ensembl_object_type = 'Transcript' and dependent_ox.ox_status = 'DUMP_OUT' "); + my $ox_dx_delete_sth = $dbi->prepare("update object_xref set ox_status = 'MULTI_DELETE' where object_xref_id = ?"); my @master_object_xrefs; my $new_master_object_xref_id; diff --git a/misc-scripts/xref_mapping/XrefMapper/ProcessPrioritys.pm b/misc-scripts/xref_mapping/XrefMapper/ProcessPrioritys.pm index f0a49f5ca2..fed75c4b98 100644 --- a/misc-scripts/xref_mapping/XrefMapper/ProcessPrioritys.pm +++ b/misc-scripts/xref_mapping/XrefMapper/ProcessPrioritys.pm @@ -56,10 +56,10 @@ sub new { } sub get_priority_names{ - my ($self) = @_; + my ($self, $dbi) = @_; - my $psth = $self->xref->dbc->prepare("select s.priority_description, s.name from source s, xref x where x.source_id = s.source_id group by s.priority_description, s.name order by s.name") || die "prepare failed"; + my $psth = $dbi->prepare("select s.priority_description, s.name from source s, xref x where x.source_id = s.source_id group by s.priority_description, s.name order by s.name") || die "prepare failed"; $psth->execute() || die "execute failed"; my @names; @@ -83,15 +83,16 @@ sub get_priority_names{ sub process { my ($self) = @_; - my @names = $self->get_priority_names(); + my $dbi = $self->xref->dbc; + my @names = $self->get_priority_names($dbi); print "The following will be processed as priority xrefs\n" if($self->verbose); foreach my $name (@names){ print "\t$name\n" if($self->verbose); } - my $update_ox_sth = $self->xref->dbc->prepare('update object_xref set ox_status = "FAILED_PRIORITY" where object_xref_id = ?'); - my $update_x_sth = $self->xref->dbc->prepare("update xref set dumped = 'NO_DUMP_ANOTHER_PRIORITY' where xref_id = ?"); + my $update_ox_sth = $dbi->prepare('update object_xref set ox_status = "FAILED_PRIORITY" where object_xref_id = ?'); + my $update_x_sth = $dbi->prepare("update xref set dumped = 'NO_DUMP_ANOTHER_PRIORITY' where xref_id = ?"); # 1) Set to failed all those that have no object xrefs. @@ -104,7 +105,7 @@ sub process { AND ox.object_xref_id is null FSQL - my $f_sth = $self->xref->dbc->prepare($f_sql); + my $f_sth = $dbi->prepare($f_sql); foreach my $name (@names){ $f_sth->execute($name); my ($xref_id); @@ -129,7 +130,7 @@ FSQL AND s.name = ? ORDER BY x.accession DESC, s.priority ASC , identity DESC, x.xref_id DESC NEWS - my $new_sth = $self->xref->dbc->prepare($new_sql); + my $new_sth = $dbi->prepare($new_sql); # # Query to copy identity_xref values from one xref to another # This is to keep alignment information event if alignment was not the best match @@ -140,7 +141,7 @@ NEWS WHERE object_xref_id = ?; IDXCP - my $idx_copy_sth = $self->xref->dbc->prepare($idx_copy_sql); + my $idx_copy_sth = $dbi->prepare($idx_copy_sql); # # Query to copy synonyms from one xref to another @@ -151,15 +152,15 @@ IDXCP WHERE xref_id = ?); SYNCP - my $syn_copy_sth = $self->xref->dbc->prepare($syn_copy_sql); + my $syn_copy_sth = $dbi->prepare($syn_copy_sql); - my $best_ox_sth = $self->xref->dbc->prepare("SELECT object_xref_id FROM object_xref WHERE xref_id = ? and ensembl_object_type = ? and ensembl_id = ?"); + my $best_ox_sth = $dbi->prepare("SELECT object_xref_id FROM object_xref WHERE xref_id = ? and ensembl_object_type = ? and ensembl_id = ?"); my $seq_score_sql = (<<SEQCP); SELECT query_identity, target_identity, hit_start, hit_end, translation_start, translation_end, cigar_line, score, evalue FROM identity_xref WHERE object_xref_id = ? SEQCP - my $seq_score_sth = $self->xref->dbc->prepare($seq_score_sql); + my $seq_score_sth = $dbi->prepare($seq_score_sql); foreach my $name (@names){ @@ -200,7 +201,7 @@ SEQCP $update_x_sth->execute($xref_id); # Copy synonyms across if they are missing $syn_copy_sth->execute($best_xref_id, $xref_id); - $self->process_dependents($xref_id, $best_xref_id); + $self->process_dependents($xref_id, $best_xref_id, $dbi); } } else{ # not DUMP_OUT @@ -251,23 +252,23 @@ SEQCP $idx_copy_sth->finish; $syn_copy_sth->finish; - my $sth = $self->xref->dbc->prepare("insert into process_status (status, date) values('prioritys_flagged',now())"); + my $sth = $dbi->prepare("insert into process_status (status, date) values('prioritys_flagged',now())"); $sth->execute(); $sth->finish; } sub process_dependents{ # master xref IDs are entries for the current accession via various methods. We take dependent xrefs from the old and add to the new - my ($self, $old_master_xref_id, $new_master_xref_id) = @_; + my ($self, $old_master_xref_id, $new_master_xref_id, $dbi) = @_; - my $matching_ens_sth = $self->xref->dbc->prepare("select distinct ensembl_object_type, ensembl_id from object_xref where ox_status not in ('FAILED_CUTOFF') and xref_id = ? order by ensembl_object_type"); - my $dep_sth = $self->xref->dbc->prepare("select distinct dx.dependent_xref_id, dx.linkage_annotation, dx.linkage_source_id, ox.ensembl_object_type from dependent_xref dx, object_xref ox where ox.xref_id = dx.dependent_xref_id and ox.master_xref_id = dx.master_xref_id and dx.master_xref_id = ? order by ox.ensembl_object_type"); - my $insert_dep_x_sth = $self->xref->dbc->prepare("insert into dependent_xref(master_xref_id, dependent_xref_id, linkage_annotation, linkage_source_id) values(?, ?, ?, ?)"); - my $insert_dep_ox_sth = $self->xref->dbc->prepare("insert ignore into object_xref(master_xref_id, ensembl_object_type, ensembl_id, linkage_type, ox_status, xref_id) values(?, ?, ?, 'DEPENDENT', 'DUMP_OUT', ?)"); - my $dep_ox_sth = $self->xref->dbc->prepare("select object_xref_id from object_xref where master_xref_id = ? and ensembl_object_type = ? and ensembl_id = ? and linkage_type = 'DEPENDENT' AND ox_status = 'DUMP_OUT' and xref_id = ?"); - my $insert_dep_go_sth = $self->xref->dbc->prepare("insert ignore into go_xref values(?, ?, ?)"); - my $insert_ix_sth = $self->xref->dbc->prepare("insert ignore into identity_xref(object_xref_id, query_identity, target_identity) values(?, 100, 100)"); + my $matching_ens_sth = $dbi->prepare("select distinct ensembl_object_type, ensembl_id from object_xref where ox_status not in ('FAILED_CUTOFF') and xref_id = ? order by ensembl_object_type"); + my $dep_sth = $dbi->prepare("select distinct dx.dependent_xref_id, dx.linkage_annotation, dx.linkage_source_id, ox.ensembl_object_type from dependent_xref dx, object_xref ox where ox.xref_id = dx.dependent_xref_id and ox.master_xref_id = dx.master_xref_id and dx.master_xref_id = ? order by ox.ensembl_object_type"); + my $insert_dep_x_sth = $dbi->prepare("insert into dependent_xref(master_xref_id, dependent_xref_id, linkage_annotation, linkage_source_id) values(?, ?, ?, ?)"); + my $insert_dep_ox_sth = $dbi->prepare("insert ignore into object_xref(master_xref_id, ensembl_object_type, ensembl_id, linkage_type, ox_status, xref_id) values(?, ?, ?, 'DEPENDENT', 'DUMP_OUT', ?)"); + my $dep_ox_sth = $dbi->prepare("select object_xref_id from object_xref where master_xref_id = ? and ensembl_object_type = ? and ensembl_id = ? and linkage_type = 'DEPENDENT' AND ox_status = 'DUMP_OUT' and xref_id = ?"); + my $insert_dep_go_sth = $dbi->prepare("insert ignore into go_xref values(?, ?, ?)"); + my $insert_ix_sth = $dbi->prepare("insert ignore into identity_xref(object_xref_id, query_identity, target_identity) values(?, 100, 100)"); my @master_xrefs = ($old_master_xref_id); my $recursive = 0; @@ -306,7 +307,7 @@ sub process_dependents{ # Remove all mappings to low priority xrefs # Then delete any leftover identity or go xrefs of it foreach my $ensembl_id (@{ $old_ensembl_ids{$object_type}} ) { - $self->_detach_object_xref($xref_id, $dep_xref_id, $object_type, $ensembl_id); + $self->_detach_object_xref($xref_id, $dep_xref_id, $object_type, $ensembl_id, $dbi); } # Duplicate each dependent for the new master xref if it is the first in the chain @@ -349,9 +350,9 @@ sub process_dependents{ # Set unimportant object_xrefs to FAILED_PRIORITY, and delete all those that remain sub _detach_object_xref { my $self = shift; - my ($xref_id, $dep_xref_id, $object_type, $ensembl_id) = @_; + my ($xref_id, $dep_xref_id, $object_type, $ensembl_id, $dbi) = @_; # Drop all the identity and go xrefs for the dependents of an xref - my $remove_dep_ox_sth = $self->xref->dbc->prepare( + my $remove_dep_ox_sth = $dbi->prepare( "DELETE ix, g FROM object_xref ox \ LEFT JOIN identity_xref ix ON ix.object_xref_id = ox.object_xref_id \ LEFT JOIN go_xref g ON g.object_xref_id = ox.object_xref_id \ @@ -359,12 +360,12 @@ sub _detach_object_xref { ); # Fail the object_xrefs that did link to the deleted identity/go xrefs. # This only updates one of potentially many, due to table contraints. - my $update_dep_ox_sth = $self->xref->dbc->prepare( + my $update_dep_ox_sth = $dbi->prepare( "UPDATE IGNORE object_xref SET ox_status = 'FAILED_PRIORITY' \ WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ox_status = 'DUMP_OUT' AND ensembl_id = ?" ); # This deletes everything left behind by the previous query. - my $clean_dep_ox_sth = $self->xref->dbc->prepare( + my $clean_dep_ox_sth = $dbi->prepare( "DELETE FROM object_xref \ WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ox_status = 'DUMP_OUT' AND ensembl_id = ?" ); diff --git a/misc-scripts/xref_mapping/XrefMapper/TestMappings.pm b/misc-scripts/xref_mapping/XrefMapper/TestMappings.pm index 1d59472f04..bfdc7e9850 100644 --- a/misc-scripts/xref_mapping/XrefMapper/TestMappings.pm +++ b/misc-scripts/xref_mapping/XrefMapper/TestMappings.pm @@ -97,14 +97,15 @@ sub unlinked_entries{ my $xref_id; my $count; + my $dbi = $self->xref->dbc; - my $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('tests_started',now())"); + my $sth_stat = $dbi->prepare("insert into process_status (status, date) values('tests_started',now())"); $sth_stat->execute(); # dependent_xref and xref my $count_sql = "select count(1) from dependent_xref d left join xref x on d.master_xref_id = x.xref_id where x.xref_id is null"; my $sql = "select distinct(d.master_xref_id) from dependent_xref d left join xref x on d.master_xref_id = x.xref_id where x.xref_id is null limit 10"; - my $sth = $self->xref->dbc->prepare($count_sql); + my $sth = $dbi->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); @@ -112,7 +113,7 @@ sub unlinked_entries{ if($count){ $failed = 1; - $sth = $self->xref->dbc->prepare($sql); + $sth = $dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; @@ -129,7 +130,7 @@ sub unlinked_entries{ $sql = "select distinct(d.dependent_xref_id) from dependent_xref d left join xref x on d.dependent_xref_id = x.xref_id where x.xref_id is null limit 10"; - $sth = $self->xref->dbc->prepare($count_sql); + $sth = $dbi->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); @@ -137,7 +138,7 @@ sub unlinked_entries{ if($count){ $failed = 1; - $sth = $self->xref->dbc->prepare($sql); + $sth = $dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; @@ -151,7 +152,7 @@ sub unlinked_entries{ $sql = "select distinct(d.xref_id) from primary_xref d left join xref x on d.xref_id = x.xref_id where x.xref_id is null limit 10"; - $sth = $self->xref->dbc->prepare($count_sql); + $sth = $dbi->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); @@ -159,7 +160,7 @@ sub unlinked_entries{ if($count){ $failed = 1; - $sth = $self->xref->dbc->prepare($sql); + $sth = $dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; @@ -174,7 +175,7 @@ sub unlinked_entries{ $sql = "select distinct(d.general_xref_id) from ".$type."_direct_xref d left join xref x on d.general_xref_id = x.xref_id where x.xref_id is null limit 10"; - $sth = $self->xref->dbc->prepare($count_sql); + $sth = $dbi->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); @@ -182,7 +183,7 @@ sub unlinked_entries{ if($count){ $failed = 1; - $sth = $self->xref->dbc->prepare($sql); + $sth = $dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; @@ -199,7 +200,7 @@ sub unlinked_entries{ $sql = "select distinct(d.xref_id) from synonym d left join xref x on d.xref_id = x.xref_id where x.xref_id is null limit 10"; - $sth = $self->xref->dbc->prepare($count_sql); + $sth = $dbi->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); @@ -207,7 +208,7 @@ sub unlinked_entries{ if($count){ $failed = 1; - $sth = $self->xref->dbc->prepare($sql); + $sth = $dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; @@ -222,7 +223,7 @@ sub unlinked_entries{ $sql = "select distinct(d.object_xref_id) from identity_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null limit 10"; - $sth = $self->xref->dbc->prepare($count_sql); + $sth = $dbi->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); @@ -230,7 +231,7 @@ sub unlinked_entries{ if($count){ $failed = 1; - $sth = $self->xref->dbc->prepare($sql); + $sth = $dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; @@ -244,7 +245,7 @@ sub unlinked_entries{ $sql = "select distinct(d.object_xref_id) from go_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null limit 10"; - $sth = $self->xref->dbc->prepare($count_sql); + $sth = $dbi->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); @@ -252,7 +253,7 @@ sub unlinked_entries{ if($count){ $failed = 1; - $sth = $self->xref->dbc->prepare($sql); + $sth = $dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; @@ -268,7 +269,7 @@ sub unlinked_entries{ $sql = "select distinct(d.".$type."_id) from gene_transcript_translation d left join ".$type."_stable_id x on d.".$type."_id = x.internal_id where x.internal_id is null and d.".$type."_id is not null limit 10"; - $sth = $self->xref->dbc->prepare($count_sql); + $sth = $dbi->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); @@ -276,7 +277,7 @@ sub unlinked_entries{ if($count){ $failed = 1; - $sth = $self->xref->dbc->prepare($sql); + $sth = $dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; @@ -292,7 +293,7 @@ sub unlinked_entries{ $count_sql = "select count(1) from xref x, source s, object_xref o left join go_xref g on o.object_xref_id = g.object_xref_id where x.xref_id = o.xref_id and s.source_id = x.source_id and s.name like 'GO' and ox_status in ('DUMP_OUT') and g.object_xref_id is null"; $sql = "select distinct(o.object_xref_id) from xref x, source s, object_xref o left join go_xref g on o.object_xref_id = g.object_xref_id where x.xref_id = o.xref_id and s.source_id = x.source_id and s.name like 'GO' and ox_status in ('DUMP_OUT') and g.object_xref_id is null limit 10"; - $sth = $self->xref->dbc->prepare($count_sql); + $sth = $dbi->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); @@ -300,7 +301,7 @@ sub unlinked_entries{ if($count){ $failed = 1; - $sth = $self->xref->dbc->prepare($sql); + $sth = $dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; @@ -311,11 +312,11 @@ sub unlinked_entries{ } if(!$failed){ - $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('tests_finished',now())"); + $sth_stat = $dbi->prepare("insert into process_status (status, date) values('tests_finished',now())"); $sth_stat->execute(); } else{ - $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('tests_failed',now())"); + $sth_stat = $dbi->prepare("insert into process_status (status, date) values('tests_failed',now())"); $sth_stat->execute(); } $sth_stat->finish; @@ -332,8 +333,9 @@ sub entry_number_check{ my %old_object_xref_count; my %new_object_xref_count; + my $dbi = $self->xref->dbc; - my $sth = $self->xref->dbc->prepare('select s.name, count(distinct x.xref_id, ensembl_id) from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id and ox_status = "DUMP_OUT" and s.name not like "AFFY%" group by s.name'); + my $sth = $dbi->prepare('select s.name, count(distinct x.xref_id, ensembl_id) from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id and ox_status = "DUMP_OUT" and s.name not like "AFFY%" group by s.name'); $sth->execute(); my ($name, $count); $sth->bind_columns(\$name,\$count); @@ -380,6 +382,7 @@ sub name_change_check{ my %new_name; # $old_name{$gene_id} = HGNC_%name my %id_to_stable_id; + my $dbi = $self->xref->dbc; my $official_name = $self->mapper->get_official_name; if(!defined($official_name)){ @@ -389,7 +392,7 @@ sub name_change_check{ my $sql = 'select x.label, gsi.internal_id, gsi.stable_id from object_xref ox, xref x, gene_stable_id gsi, source s where x.xref_id = ox.xref_id and ox.ensembl_object_type = "Gene" and gsi.internal_id = ox.ensembl_id and x.source_id = s.source_id and s.name like "'.$official_name.'_%"'; - my $sth = $self->xref->dbc->prepare($sql); + my $sth = $dbi->prepare($sql); $sth->execute(); my ($name, $gene_id, $stable_id); $sth->bind_columns(\$name,\$gene_id, \$stable_id); @@ -408,7 +411,7 @@ sub name_change_check{ # Use synonyms as well. my %alias; $sql = 'select x.label, sy.synonym from xref x, synonym sy, source so where x.xref_id = sy.xref_id and so.source_id = x.source_id and so.name like "'.$official_name.'_%" '; - $sth = $self->xref->dbc->prepare($sql); + $sth = $dbi->prepare($sql); $sth->execute(); my ($syn); $sth->bind_columns(\$name,\$syn); @@ -419,7 +422,7 @@ sub name_change_check{ $sth->finish; $sql = 'select x.label, sy.synonym from xref x, synonym sy, source so where x.xref_id = sy.xref_id and so.source_id = x.source_id and so.name like "EntrezGene"'; - $sth = $self->xref->dbc->prepare($sql); + $sth = $dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$name,\$syn); while($sth->fetch()){ @@ -457,12 +460,13 @@ sub name_change_check{ sub direct_stable_id_check{ my ($self) = @_; + my $dbi = $self->xref->dbc; foreach my $type (qw(gene transcript translation)){ my $sql = "select s.name, count(*) from source s, xref x, ".$type."_direct_xref gdx left join ".$type."_stable_id gsi on gdx.ensembl_stable_id = gsi.stable_id where s.source_id = x.source_id and x.xref_id = gdx.general_xref_id and gsi.stable_id is null group by s.name"; - my $sth = $self->xref->dbc->prepare($sql); + my $sth = $dbi->prepare($sql); $sth->execute(); my ($name, $count); $sth->bind_columns(\$name,\$count); diff --git a/misc-scripts/xref_mapping/XrefMapper/XrefLoader.pm b/misc-scripts/xref_mapping/XrefMapper/XrefLoader.pm index cb7ac8b6fe..377a0e2e28 100644 --- a/misc-scripts/xref_mapping/XrefMapper/XrefLoader.pm +++ b/misc-scripts/xref_mapping/XrefMapper/XrefLoader.pm @@ -57,6 +57,8 @@ sub update{ my $verbose = $self->mapper->verbose; + my $core_dbi = $self->core->dbc; + my $xref_dbi = $self->xref->dbc; ##################################### # first remove all the projections. # @@ -64,7 +66,7 @@ sub update{ print "Deleting all PROJECTIONs from this database\n" if $verbose; my $sql = "DELETE es FROM xref x, external_synonym es WHERE x.xref_id = es.xref_id and x.info_type = 'PROJECTION'"; - my $sth = $self->core->dbc->prepare($sql); + my $sth = $core_dbi->prepare($sql); my $affected_rows = $sth->execute(); print "\tDeleted $affected_rows PROJECTED external_synonym row(s)\n" if $verbose; @@ -73,17 +75,17 @@ DELETE ontology_xref FROM ontology_xref, object_xref, xref WHERE ontology_xref.object_xref_id = object_xref.object_xref_id AND object_xref.xref_id = xref.xref_id AND xref.info_type = 'PROJECTION' SQL - $sth = $self->core->dbc->prepare($sql); + $sth = $core_dbi->prepare($sql); $affected_rows = $sth->execute(); print "\tDeleted $affected_rows PROJECTED ontology_xref row(s)\n" if $verbose; $sql = "DELETE object_xref FROM object_xref, xref WHERE object_xref.xref_id = xref.xref_id AND xref.info_type = 'PROJECTION'"; - $sth = $self->core->dbc->prepare($sql); + $sth = $core_dbi->prepare($sql); $affected_rows = $sth->execute(); print "\tDeleted $affected_rows PROJECTED object_xref row(s)\n" if $verbose; $sql = "DELETE xref FROM xref WHERE xref.info_type = 'PROJECTION'"; - $sth = $self->core->dbc->prepare($sql); + $sth = $core_dbi->prepare($sql); $affected_rows = $sth->execute(); print "\tDeleted $affected_rows PROJECTED xref row(s)\n" if $verbose; @@ -95,7 +97,7 @@ SQL my %name_to_external_db_id; $sql = "select external_db_id, db_name from external_db"; - $sth = $self->core->dbc->prepare($sql); + $sth = $core_dbi->prepare($sql); $sth->execute(); my ($id, $name); $sth->bind_columns(\$id, \$name); @@ -107,7 +109,7 @@ SQL my %source_id_to_external_db_id; $sql = 'select s.source_id, s.name from source s, xref x where x.source_id = s.source_id group by s.source_id'; # only get those of interest - $sth = $self->xref->dbc->prepare($sql); + $sth = $xref_dbi->prepare($sql); $sth->execute(); $sth->bind_columns(\$id, \$name); while($sth->fetch()){ @@ -123,7 +125,7 @@ SQL $sth->finish; - $sth = $self->xref->dbc->prepare("update xref set dumped = null where dumped != 'NO_DUMP_ANOTHER_PRIORITY'"); # just incase this is being ran again + $sth = $xref_dbi->prepare("update xref set dumped = null where dumped != 'NO_DUMP_ANOTHER_PRIORITY'"); # just incase this is being ran again $sth->execute; $sth->finish; @@ -135,21 +137,21 @@ SQL # Delete the existing ones # ###################################### my ($count); - $sth = $self->xref->dbc->prepare('select s.name, count(*) from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id group by s.name'); + $sth = $xref_dbi->prepare('select s.name, count(*) from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id group by s.name'); $sth->execute(); $sth->bind_columns(\$name,\$count); - my $synonym_sth = $self->core->dbc->prepare('DELETE external_synonym FROM external_synonym, xref WHERE external_synonym.xref_id = xref.xref_id AND xref.external_db_id = ?'); - my $go_sth = $self->core->dbc->prepare('DELETE ontology_xref.* FROM ontology_xref, object_xref, xref WHERE ontology_xref.object_xref_id = object_xref.object_xref_id AND object_xref.xref_id = xref.xref_id AND xref.external_db_id = ?'); - my $identity_sth = $self->core->dbc->prepare('DELETE identity_xref FROM identity_xref, object_xref, xref WHERE identity_xref.object_xref_id = object_xref.object_xref_id AND object_xref.xref_id = xref.xref_id AND xref.external_db_id = ?'); - my $object_sth = $self->core->dbc->prepare('DELETE object_xref FROM object_xref, xref WHERE object_xref.xref_id = xref.xref_id AND xref.external_db_id = ?'); - my $dependent_sth = $self->core->dbc->prepare('DELETE d FROM dependent_xref d, xref x WHERE d.dependent_xref_id = x.xref_id and x.external_db_id = ?'); - my $xref_sth = $self->core->dbc->prepare('DELETE FROM xref WHERE xref.external_db_id = ?'); - my $unmapped_sth = $self->core->dbc->prepare('DELETE FROM unmapped_object WHERE type="xref" and external_db_id = ?'); + my $synonym_sth = $core_dbi->prepare('DELETE external_synonym FROM external_synonym, xref WHERE external_synonym.xref_id = xref.xref_id AND xref.external_db_id = ?'); + my $go_sth = $core_dbi->prepare('DELETE ontology_xref.* FROM ontology_xref, object_xref, xref WHERE ontology_xref.object_xref_id = object_xref.object_xref_id AND object_xref.xref_id = xref.xref_id AND xref.external_db_id = ?'); + my $identity_sth = $core_dbi->prepare('DELETE identity_xref FROM identity_xref, object_xref, xref WHERE identity_xref.object_xref_id = object_xref.object_xref_id AND object_xref.xref_id = xref.xref_id AND xref.external_db_id = ?'); + my $object_sth = $core_dbi->prepare('DELETE object_xref FROM object_xref, xref WHERE object_xref.xref_id = xref.xref_id AND xref.external_db_id = ?'); + my $dependent_sth = $core_dbi->prepare('DELETE d FROM dependent_xref d, xref x WHERE d.dependent_xref_id = x.xref_id and x.external_db_id = ?'); + my $xref_sth = $core_dbi->prepare('DELETE FROM xref WHERE xref.external_db_id = ?'); + my $unmapped_sth = $core_dbi->prepare('DELETE FROM unmapped_object WHERE type="xref" and external_db_id = ?'); - my $transaction_start_sth = $self->core->dbc->prepare('start transaction'); - my $transaction_end_sth = $self->core->dbc->prepare('commit'); + my $transaction_start_sth = $core_dbi->prepare('start transaction'); + my $transaction_end_sth = $core_dbi->prepare('commit'); # # ?? Is it faster to delete them all in one go with a external_db_id in (....) ??? @@ -201,7 +203,7 @@ SQL # Get the offsets for object_xref, xref # ########################################## - $sth = $self->core->dbc->prepare('select MAX(xref_id) from xref'); + $sth = $core_dbi->prepare('select MAX(xref_id) from xref'); my $xref_offset; $sth->execute; $sth->bind_columns(\$xref_offset); @@ -211,7 +213,7 @@ SQL $self->add_meta_pair("xref_offset", $xref_offset); - $sth = $self->core->dbc->prepare('select MAX(object_xref_id) from object_xref'); + $sth = $core_dbi->prepare('select MAX(object_xref_id) from object_xref'); my $object_xref_offset; $sth->execute; $sth->bind_columns(\$object_xref_offset); @@ -253,7 +255,7 @@ SELECT x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, x.info_type = ? order by x.xref_id DIRS - my $seq_sth = $self->xref->dbc->prepare($seq_sql); + my $seq_sth = $xref_dbi->prepare($seq_sql); ########################### @@ -271,9 +273,9 @@ SELECT x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, x.info_type = ? order by x.xref_id DIRS - my $dir_sth = $self->xref->dbc->prepare($dir_sql); + my $dir_sth = $xref_dbi->prepare($dir_sql); -# $dependent_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, d.master_xref_id from xref x, object_xref ox, dependent_xref d where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and d.object_xref_id = ox.object_xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id'); +# $dependent_sth = $xref_dbi->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, d.master_xref_id from xref x, object_xref ox, dependent_xref d where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and d.object_xref_id = ox.object_xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id'); my $dep_sql =(<<DSQL); SELECT x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, @@ -286,7 +288,7 @@ SELECT x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ORDER BY x.xref_id, ox.ensembl_id DSQL - $dependent_sth = $self->xref->dbc->prepare($dep_sql); + $dependent_sth = $xref_dbi->prepare($dep_sql); my $go_sql =(<<GSQL); @@ -302,7 +304,7 @@ DSQL order by x.xref_id, ox.ensembl_id GSQL - $go_sth = $self->xref->dbc->prepare($go_sql); + $go_sth = $xref_dbi->prepare($go_sql); my $go_count_sql = (<<GCNTSQL); SELECT count(*) @@ -316,13 +318,13 @@ GCNTSQL # SQL to add data to core ######################### - my $add_identity_xref_sth = $self->core->dbc->prepare('insert ignore into identity_xref (object_xref_id, xref_identity, ensembl_identity, xref_start, xref_end, ensembl_start, ensembl_end, cigar_line, score, evalue) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'); - my $add_go_xref_sth = $self->core->dbc->prepare('insert ignore into ontology_xref (object_xref_id, source_xref_id, linkage_type) values (?, ?, ?)'); - my $add_dependent_xref_sth = $self->core->dbc->prepare('insert ignore into dependent_xref (object_xref_id, master_xref_id, dependent_xref_id) values (?, ?, ?)'); - my $add_syn_sth = $self->core->dbc->prepare('insert ignore into external_synonym (xref_id, synonym) values (?, ?)'); - my $add_release_info_sth = $self->core->dbc->prepare('update external_db set db_release = ? where external_db_id = ?'); + my $add_identity_xref_sth = $core_dbi->prepare('insert ignore into identity_xref (object_xref_id, xref_identity, ensembl_identity, xref_start, xref_end, ensembl_start, ensembl_end, cigar_line, score, evalue) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'); + my $add_go_xref_sth = $core_dbi->prepare('insert ignore into ontology_xref (object_xref_id, source_xref_id, linkage_type) values (?, ?, ?)'); + my $add_dependent_xref_sth = $core_dbi->prepare('insert ignore into dependent_xref (object_xref_id, master_xref_id, dependent_xref_id) values (?, ?, ?)'); + my $add_syn_sth = $core_dbi->prepare('insert ignore into external_synonym (xref_id, synonym) values (?, ?)'); + my $add_release_info_sth = $core_dbi->prepare('update external_db set db_release = ? where external_db_id = ?'); - $sth = $self->xref->dbc->prepare('select s.name, s.source_id, count(*), x.info_type, s.priority_description, s.source_release from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id and ox_status = "DUMP_OUT" group by s.name, s.source_id, x.info_type'); + $sth = $xref_dbi->prepare('select s.name, s.source_id, count(*), x.info_type, s.priority_description, s.source_release from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id and ox_status = "DUMP_OUT" group by s.name, s.source_id, x.info_type'); $sth->execute(); my ($type, $source_id, $where_from, $release_info); $sth->bind_columns(\$name,\$source_id, \$count, \$type, \$where_from, \$release_info); @@ -342,7 +344,7 @@ GCNTSQL my @xref_list=(); # process at end. Add synonyms and set dumped = 1; - my $go_count_sth = $self->xref->dbc->prepare($go_count_sql); + my $go_count_sth = $xref_dbi->prepare($go_count_sql); $go_count_sth->execute($source_id, $type); my ($go_data_present) = $go_count_sth->fetchrow_array; $go_count_sth->finish; @@ -367,10 +369,10 @@ GCNTSQL if($last_xref != $xref_id){ push @xref_list, $xref_id; $count++; - $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from); + $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from, $core_dbi); $last_xref = $xref_id; } - $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}); + $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}, $core_dbi); $add_go_xref_sth->execute( ($object_xref_id+$object_xref_offset), 0, $linkage_type); $add_identity_xref_sth->execute( ($object_xref_id+$object_xref_offset), $query_identity, $target_identity, $hit_start, $hit_end, $translation_start, $translation_end, $cigar_line, $score, $evalue) if $translation_start; @@ -389,10 +391,10 @@ GCNTSQL if($last_xref != $xref_id){ push @xref_list, $xref_id; $count++; - $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from); + $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from, $core_dbi); $last_xref = $xref_id; } - $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}); + $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}, $core_dbi); $add_identity_xref_sth->execute( ($object_xref_id+$object_xref_offset), $query_identity, $target_identity, $hit_start, $hit_end, $translation_start, $translation_end, $cigar_line, $score, $evalue) if $translation_start; } @@ -414,10 +416,10 @@ GCNTSQL if($last_xref != $xref_id) { push @xref_list, $xref_id; $count++; - $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from); + $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from, $core_dbi); $last_xref = $xref_id; } - $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $checksum_analysis_id); + $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $checksum_analysis_id, $core_dbi); } print "CHECKSUM $count\n" if ($verbose); } @@ -437,10 +439,10 @@ GCNTSQL if($last_xref != $xref_id){ push @xref_list, $xref_id; $count++; - $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from); + $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from, $core_dbi); $last_xref = $xref_id; } - $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}); + $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}, $core_dbi); if(defined($master_xref_id)){ # need to sort this out as all should habe one really. (interpro generates go without these!!) $add_dependent_xref_sth->execute(($object_xref_id+$object_xref_offset), ($master_xref_id+$xref_offset), ($xref_id+$xref_offset) ); $add_go_xref_sth->execute( ($object_xref_id+$object_xref_offset), ($master_xref_id+$xref_offset), $linkage_type); @@ -467,10 +469,10 @@ GCNTSQL if($last_xref != $xref_id){ push @xref_list, $xref_id; $count++; - $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label || $acc, $version, $desc, $type, $info || $where_from); + $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label || $acc, $version, $desc, $type, $info || $where_from, $core_dbi); } if($last_xref != $xref_id or $last_ensembl != $ensembl_id){ - $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}); + $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}, $core_dbi); if (defined($master_xref_id)){ # need to sort this out for FlyBase since there are EMBL direct entries from the GFF and dependent xrefs from Uniprot $add_dependent_xref_sth->execute(($object_xref_id+$object_xref_offset), ($master_xref_id+$xref_offset), ($xref_id+$xref_offset) ); } @@ -507,10 +509,10 @@ GCNTSQL if($last_xref != $xref_id){ push @xref_list, $xref_id; $count++; - $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from); + $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from, $core_dbi); $last_xref = $xref_id; } - $object_xref_id = $self->add_object_xref ($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}); + $object_xref_id = $self->add_object_xref ($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}, $core_dbi); $add_identity_xref_sth->execute( ($object_xref_id+$object_xref_offset), $query_identity, $target_identity, $hit_start, $hit_end, $translation_start, $translation_end, $cigar_line, $score, $evalue); } @@ -525,7 +527,7 @@ GCNTSQL if(@xref_list){ my $syn_count = 0; my $syn_sql = "select xref_id, synonym from synonym where xref_id in(".join(", ",@xref_list).")"; - my $syn_sth = $self->xref->dbc->prepare($syn_sql); + my $syn_sth = $xref_dbi->prepare($syn_sql); $syn_sth->execute(); my ($xref_id, $syn); @@ -537,7 +539,7 @@ GCNTSQL $syn_sth->finish; print "\tadded $syn_count synonyms\n" if($syn_count); - my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 'MAPPED' where xref_id in (".join(", ",@xref_list).")"); + my $xref_dumped_sth = $xref_dbi->prepare("update xref set dumped = 'MAPPED' where xref_id in (".join(", ",@xref_list).")"); $xref_dumped_sth->execute() || die "Could not set dumped status"; $xref_dumped_sth->finish; } @@ -563,7 +565,7 @@ GCNTSQL my %reason_id; # Get the cutoff values - $sth = $self->xref->dbc->prepare("select distinct s.name, m.percent_query_cutoff, m.percent_target_cutoff from source s, source_mapping_method sm, mapping m where sm.source_id = s.source_id and sm.method = m.method"); + $sth = $xref_dbi->prepare("select distinct s.name, m.percent_query_cutoff, m.percent_target_cutoff from source s, source_mapping_method sm, mapping m where sm.source_id = s.source_id and sm.method = m.method"); $sth->execute(); my ($source_name, $q_cut, $t_cut); $sth->bind_columns(\$source_name, \$q_cut, \$t_cut); @@ -592,14 +594,14 @@ GCNTSQL foreach my $key (keys %desc_failed){ - $sth = $self->core->dbc->prepare("select unmapped_reason_id from unmapped_reason where full_description like '".$desc_failed{$key}."'"); + $sth = $core_dbi->prepare("select unmapped_reason_id from unmapped_reason where full_description like '".$desc_failed{$key}."'"); $sth->execute(); my $failed_id=undef; $sth->bind_columns(\$failed_id); $sth->fetch; $sth->finish; if(!defined($failed_id)){ - $sth = $self->core->dbc->prepare('insert into unmapped_reason (summary_description, full_description) values("'.$summary_failed{$key}.'", "'.$desc_failed{$key}.'")'); + $sth = $core_dbi->prepare('insert into unmapped_reason (summary_description, full_description) values("'.$summary_failed{$key}.'", "'.$desc_failed{$key}.'")'); $sth->execute(); $failed_id = $sth->{'mysql_insertid'}; $sth->finish @@ -624,19 +626,19 @@ GCNTSQL AND x.info_type = 'DIRECT' DIR - my $direct_unmapped_sth = $self->xref->dbc->prepare($sql); + my $direct_unmapped_sth = $xref_dbi->prepare($sql); my ($xref_id, $acc, $version, $label, $desc, $info); $direct_unmapped_sth->execute(); $direct_unmapped_sth->bind_columns(\$xref_id, \$acc, \$version, \$label, \$desc, \$type, \$info, \$dbname); - my $set_unmapped_sth = $self->core->dbc->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id ) values ('xref', ?, ?, ?, ?)"); + my $set_unmapped_sth = $core_dbi->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id ) values ('xref', ?, ?, ?, ?)"); my @xref_list = (); my $analysis_id = $analysis_ids{'Transcript'}; # No real analysis here but in table it is set to not NULL while($direct_unmapped_sth->fetch()){ my $ex_id = $name_to_external_db_id{$dbname}; if(defined($name_to_external_db_id{$dbname})){ - $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, 'UNMAPPED', $info); + $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, 'UNMAPPED', $info, $core_dbi); $set_unmapped_sth->execute($analysis_id, $ex_id, $acc, $reason_id{"NO_STABLE_ID"}); push @xref_list, $xref_id; } @@ -646,7 +648,7 @@ DIR if(@xref_list){ - my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 'UNMAPPED_NO_STABLE_ID' where xref_id in (".join(", ",@xref_list).")"); + my $xref_dumped_sth = $xref_dbi->prepare("update xref set dumped = 'UNMAPPED_NO_STABLE_ID' where xref_id in (".join(", ",@xref_list).")"); $xref_dumped_sth->execute(); $xref_dumped_sth->finish; } @@ -663,7 +665,7 @@ DIR AND x.info_type = 'MISC' MIS - my $misc_unmapped_sth = $self->xref->dbc->prepare($sql); + my $misc_unmapped_sth = $xref_dbi->prepare($sql); $misc_unmapped_sth->execute(); $misc_unmapped_sth->bind_columns(\$xref_id, \$acc, \$version, \$label, \$desc, \$type, \$info, \$dbname); @@ -672,7 +674,7 @@ MIS while($misc_unmapped_sth->fetch()){ my $ex_id = $name_to_external_db_id{$dbname}; if(defined($name_to_external_db_id{$dbname})){ - $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, 'UNMAPPED', $info); + $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, 'UNMAPPED', $info, $core_dbi); $set_unmapped_sth->execute($analysis_id, $ex_id, $acc, $reason_id{"NO_MAPPING"}); push @xref_list, $xref_id; } @@ -682,7 +684,7 @@ MIS if(@xref_list){ - my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 'UNMAPPED_NO_MAPPING' where xref_id in (".join(", ",@xref_list).")"); + my $xref_dumped_sth = $xref_dbi->prepare("update xref set dumped = 'UNMAPPED_NO_MAPPING' where xref_id in (".join(", ",@xref_list).")"); $xref_dumped_sth->execute(); $xref_dumped_sth->finish; } @@ -704,12 +706,12 @@ MIS ORDER BY s.name, x.accession DEP - my $dep_unmapped_sth = $self->xref->dbc->prepare($sql); + my $dep_unmapped_sth = $xref_dbi->prepare($sql); $dep_unmapped_sth->execute(); my $parent; $dep_unmapped_sth->bind_columns(\$xref_id, \$acc, \$version, \$label, \$desc, \$type, \$info, \$dbname, \$parent); - $set_unmapped_sth = $self->core->dbc->prepare("insert ignore into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id, parent ) values ('xref', ?, ?, ?, '".$reason_id{"MASTER_FAILED"}."', ?)"); + $set_unmapped_sth = $core_dbi->prepare("insert ignore into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id, parent ) values ('xref', ?, ?, ?, '".$reason_id{"MASTER_FAILED"}."', ?)"); @xref_list = (); my $last_acc= 0; @@ -719,7 +721,7 @@ DEP next; } if($last_acc ne $acc){ - $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label||$acc, $version, $desc, 'UNMAPPED', $info); + $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label||$acc, $version, $desc, 'UNMAPPED', $info, $core_dbi); } $last_acc = $acc; $set_unmapped_sth->execute($analysis_id, $ex_id, $acc, $parent); @@ -730,7 +732,7 @@ DEP if(@xref_list){ - my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 'UNMAPPED_MASTER_FAILED' where xref_id in (".join(", ",@xref_list).")"); + my $xref_dumped_sth = $xref_dbi->prepare("update xref set dumped = 'UNMAPPED_MASTER_FAILED' where xref_id in (".join(", ",@xref_list).")"); $xref_dumped_sth->execute(); $xref_dumped_sth->finish; } @@ -756,13 +758,13 @@ DEP SEQ # removed AND ox.ox_status != 'FAILED_PRIORITY' - my $seq_unmapped_sth = $self->xref->dbc->prepare($sql); + my $seq_unmapped_sth = $xref_dbi->prepare($sql); $seq_unmapped_sth->execute(); my ($ensembl_object_type, $ensembl_id, $q_id, $t_id, $seq_type, $status) ; $seq_unmapped_sth->bind_columns(\$xref_id, \$acc, \$version, \$label, \$desc, \$type, \$info, \$dbname, \$seq_type, \$ensembl_object_type, \$ensembl_id, \$q_id, \$t_id,\$status); - my $set_unmapped_no_sth = $self->core->dbc->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id, ensembl_object_type ) values ('xref', ?, ?, ?, '".$reason_id{"FAILED_MAP"}."', ?)"); - my $set_unmapped_failed_sth = $self->core->dbc->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id, query_score, target_score, ensembl_id, ensembl_object_type ) values ('xref', ?, ?, ?, ?,?,?,?,?)"); + my $set_unmapped_no_sth = $core_dbi->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id, ensembl_object_type ) values ('xref', ?, ?, ?, '".$reason_id{"FAILED_MAP"}."', ?)"); + my $set_unmapped_failed_sth = $core_dbi->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id, query_score, target_score, ensembl_id, ensembl_object_type ) values ('xref', ?, ?, ?, ?,?,?,?,?)"); @xref_list = (); @@ -774,7 +776,7 @@ SEQ next; } if($last_xref != $xref_id){ - $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, 'UNMAPPED', $info); + $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, 'UNMAPPED', $info, $core_dbi); } $last_xref = $xref_id; if(defined($ensembl_id)){ @@ -800,7 +802,7 @@ SEQ if(@xref_list){ - my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 'UNMAPPED_NO_MAPPING' where xref_id in (".join(", ",@xref_list).")"); + my $xref_dumped_sth = $xref_dbi->prepare("update xref set dumped = 'UNMAPPED_NO_MAPPING' where xref_id in (".join(", ",@xref_list).")"); $xref_dumped_sth->execute(); $xref_dumped_sth->finish; } @@ -822,11 +824,11 @@ SEQ WEL - my $wel_unmapped_sth = $self->xref->dbc->prepare($sql); + my $wel_unmapped_sth = $xref_dbi->prepare($sql); $wel_unmapped_sth->execute(); $wel_unmapped_sth->bind_columns(\$xref_id, \$acc, \$version, \$label, \$desc, \$type, \$info, \$dbname); - $set_unmapped_sth = $self->core->dbc->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id) values ('xref', ?, ?, ?, '".$reason_id{"NO_MASTER"}."')"); + $set_unmapped_sth = $core_dbi->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id) values ('xref', ?, ?, ?, '".$reason_id{"NO_MASTER"}."')"); $analysis_id = $analysis_ids{'Transcript'}; # No real analysis here but in table it is set to not NULL @xref_list = (); @@ -835,7 +837,7 @@ WEL if(!defined($ex_id)){ next; } - $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, 'UNMAPPED', $info); + $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, 'UNMAPPED', $info, $core_dbi); $set_unmapped_sth->execute($analysis_id, $ex_id, $acc); push @xref_list, $xref_id; } @@ -843,7 +845,7 @@ WEL $set_unmapped_sth->finish; if(@xref_list){ - my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 'UNMAPPED_NO_MASTER' where xref_id in (".join(", ",@xref_list).")"); + my $xref_dumped_sth = $xref_dbi->prepare("update xref set dumped = 'UNMAPPED_NO_MASTER' where xref_id in (".join(", ",@xref_list).")"); $xref_dumped_sth->execute(); $xref_dumped_sth->finish; } @@ -851,7 +853,7 @@ WEL $transaction_end_sth->execute(); - my $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('core_loaded',now())"); + my $sth_stat = $xref_dbi->prepare("insert into process_status (status, date) values('core_loaded',now())"); $sth_stat->execute(); $sth_stat->finish; @@ -904,8 +906,7 @@ sub get_single_analysis { sub add_xref { - my ($self, $offset, $xref_id, $external_db_id, $dbprimary_acc, $display_label, $version, $description, $info_type, $info_text) = @_; - my $dbc = $self->core->dbc(); + my ($self, $offset, $xref_id, $external_db_id, $dbprimary_acc, $display_label, $version, $description, $info_type, $info_text, $dbc) = @_; my $select_sth = $dbc->prepare("select xref_id from xref where dbprimary_acc = ? and external_db_id = ? and info_type = ? and info_text = ? and version = ?"); my $insert_sth = $dbc->prepare("insert into xref (xref_id, external_db_id, dbprimary_acc, display_label, version, description, info_type, info_text) values (?, ?, ?, ?, ?, ?, ?, ?)"); my $new_xref_id; @@ -921,8 +922,7 @@ sub add_xref { } sub add_object_xref { - my ($self, $offset, $object_xref_id, $ensembl_id, $ensembl_object_type, $xref_id, $analysis_id) = @_; - my $dbc = $self->core->dbc(); + my ($self, $offset, $object_xref_id, $ensembl_id, $ensembl_object_type, $xref_id, $analysis_id, $dbc) = @_; my $select_sth = $dbc->prepare("select object_xref_id from object_xref where xref_id = ? and ensembl_object_type = ? and ensembl_id = ? and analysis_id = ?"); my $insert_sth = $dbc->prepare("insert ignore into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id, analysis_id) values (?, ?, ?, ?, ?)"); my $new_object_xref_id; diff --git a/misc-scripts/xref_mapping/XrefParser/ArrayExpressParser.pm b/misc-scripts/xref_mapping/XrefParser/ArrayExpressParser.pm index feefc4410c..ecd64bbcfe 100644 --- a/misc-scripts/xref_mapping/XrefParser/ArrayExpressParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/ArrayExpressParser.pm @@ -38,9 +38,12 @@ sub run_script { my ($self, $ref_arg) = @_; my $source_id = $ref_arg->{source_id}; my $species_id = $ref_arg->{species_id}; + my $species_name = $ref_arg->{species}; my $file = $ref_arg->{file}; my $verbose = $ref_arg->{verbose}; my $db = $ref_arg->{dba}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){ croak "Need to pass source_id, species_id and file as pairs"; @@ -73,7 +76,9 @@ sub run_script { $user = $1; } - my %species_id_to_names = $self->species_id2name(); + my %species_id_to_names = $self->species_id2name($dbi); + if (defined $species_name) { push @{$species_id_to_names{$species_id}}, $species_name; } + if (!defined $species_id_to_names{$species_id}) { next; } my $species_id_to_names = \%species_id_to_names; my $names = $species_id_to_names->{$species_id}; my $species_lookup = $self->_get_species($verbose); @@ -83,7 +88,7 @@ sub run_script { return; } - my $species_name = $species_id_to_names{$species_id}[0]; + $species_name = $species_id_to_names{$species_id}[0]; #get stable_ids from core and create xrefs @@ -140,9 +145,10 @@ sub run_script { label => $gene_stable_id, source_id => $source_id, species_id => $species_id, + dbi => $dbi, info_type => "DIRECT"} ); - $self->add_direct_xref( $xref_id, $gene_stable_id, 'gene', ''); + $self->add_direct_xref( $xref_id, $gene_stable_id, 'gene', '', $dbi); if ($xref_id) { $xref_count++; } diff --git a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm index 56083c5473..1feaedf68e 100644 --- a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm @@ -31,15 +31,6 @@ use Getopt::Long; my $base_dir = File::Spec->curdir(); -my $add_xref_sth = undef; -my $add_object_xref_sth = undef; -my $add_identity_xref_sth = undef; -my %add_direct_xref_sth; -my $add_dependent_xref_sth = undef; -my $get_xref_sth = undef; -my $get_object_xref_sth = undef; -my $add_synonym_sth = undef; - my %xref_dependent_mapped; @@ -139,7 +130,8 @@ sub get_filehandle # Returns source_id or -1 if not found ############################################# sub get_source_id_for_source_name { - my ($self, $source_name,$priority_desc) = @_; + my ($self, $source_name,$priority_desc, $dbi) = @_; + $dbi = $self->dbi unless defined $dbi; my $low_name = lc $source_name; my $sql = "SELECT source_id FROM source WHERE LOWER(name)='$low_name'"; @@ -148,7 +140,7 @@ sub get_source_id_for_source_name { $sql .= " AND LOWER(priority_description)='$low_name'"; $source_name .= " ($priority_desc)"; } - my $sth = $self->dbi->prepare($sql); + my $sth = $dbi->prepare($sql); $sth->execute(); my @row = $sth->fetchrow_array(); my $source_id; @@ -176,12 +168,13 @@ sub get_source_id_for_source_name { ############################################################ sub get_source_ids_for_source_name_pattern { - my ($self, $source_name) = @_; + my ($self, $source_name, $dbi) = @_; + $dbi = $self->dbi unless defined $dbi; my $big_name = uc $source_name; my $sql = "SELECT source_id FROM source WHERE upper(name) LIKE '%${big_name}%'"; - my $sth = $self->dbi->prepare($sql); + my $sth = $dbi->prepare($sql); my @sources; $sth->execute(); while(my @row = $sth->fetchrow_array()){ @@ -198,11 +191,12 @@ sub get_source_ids_for_source_name_pattern { # From a source_id get the name ############################### sub get_source_name_for_source_id { - my ($self, $source_id) = @_; + my ($self, $source_id, $dbi) = @_; + $dbi = $self->dbi unless defined $dbi; my $source_name; my $sql = "SELECT name FROM source WHERE source_id= '$source_id'"; - my $sth = $self->dbi->prepare($sql); + my $sth = $dbi->prepare($sql); $sth->execute(); my @row = $sth->fetchrow_array(); if (@row) { @@ -224,11 +218,10 @@ sub get_source_name_for_source_id { # to master_xref_id for all of source names given ##################################################### sub get_valid_xrefs_for_dependencies{ - my ($self, $dependent_name, @reverse_ordered_source_list) = @_; + my ($self, $dependent_name, $dbi, @reverse_ordered_source_list) = @_; + $dbi = $self->dbi unless defined $dbi; my %dependent_2_xref; - my $dbi = $self->dbi(); - my $sql = 'select source_id from source where LOWER(name) =?'; my $sth = $dbi->prepare($sql); @@ -275,11 +268,10 @@ DSS # to master_xref_id for all of source names given ##################################################### sub get_valid_xrefs_for_direct_xrefs{ - my ($self, $direct_name, $separator) = @_; + my ($self, $direct_name, $separator, $dbi) = @_; + $dbi = $self->dbi unless defined $dbi; my %direct_2_xref; - my $dbi = $self->dbi; - my $sql = 'select source_id from source where name like ?'; my $sth = $dbi->prepare($sql); @@ -326,8 +318,8 @@ GDS ############################################# sub label_to_acc{ - my ($self,$source_name,$species_id) =@_; - my $dbi = $self->dbi; + my ($self,$source_name,$species_id, $dbi) =@_; + $dbi = $self->dbi unless defined $dbi; # First cache synonyms so we can quickly add them later my %synonyms; @@ -380,11 +372,11 @@ sub label_to_acc{ #################################################### sub get_valid_codes{ - my ($self,$source_name,$species_id) =@_; + my ($self,$source_name,$species_id, $dbi) =@_; my %valid_codes; my @sources; - my $dbi = $self->dbi(); + $dbi = $self->dbi unless defined $dbi; my $big_name = uc $source_name; my $sql = "select source_id from source where upper(name) like '%$big_name%'"; @@ -410,7 +402,7 @@ sub get_valid_codes{ # Upload xrefs to the database ############################## sub upload_xref_object_graphs { - my ($self, $rxrefs) = @_; + my ($self, $rxrefs, $dbi) = @_; my $count = scalar @{$rxrefs}; if($verbose) { @@ -431,7 +423,7 @@ sub upload_xref_object_graphs { ################################################################################# # Start of sql needed to add xrefs, primary_xrefs, synonym, dependent_xrefs etc.. ################################################################################# - my $dbi = $self->dbi; + $dbi = $self->dbi unless defined $dbi; my $xref_sth = $dbi->prepare('INSERT INTO xref (accession,version,label,description,source_id,species_id, info_type) VALUES(?,?,?,?,?,?,?)'); my $pri_insert_sth = $dbi->prepare('INSERT INTO primary_xref VALUES(?,?,?,?)'); my $pri_update_sth = $dbi->prepare('UPDATE primary_xref SET sequence=? WHERE xref_id=?'); @@ -638,19 +630,20 @@ sub upload_xref_object_graphs { # $direct_xref is a reference to an array of hash objects. ###################################################################################### sub upload_direct_xrefs{ - my ($self, $direct_xref) = @_; + my ($self, $direct_xref, $dbi) = @_; + $dbi = $self->dbi unless defined $dbi; for my $dr(@{$direct_xref}) { ################################################ # Find the xref_id for this accession and source ################################################ - my $general_xref_id = get_xref($dr->{ACCESSION},$dr->{SOURCE_ID},$dr->{SPECIES_ID}); + my $general_xref_id = $self->get_xref($dr->{ACCESSION},$dr->{SOURCE_ID},$dr->{SPECIES_ID}, $dbi); ####################################################### # If found add the direct xref else write error message ####################################################### if ($general_xref_id){ - $self->add_direct_xref($general_xref_id, $dr->{ENSEMBL_STABLE_ID},$dr->{ENSEMBL_TYPE},$dr->{LINKAGE_XREF}); + $self->add_direct_xref($general_xref_id, $dr->{ENSEMBL_STABLE_ID},$dr->{ENSEMBL_TYPE},$dr->{LINKAGE_XREF}, $dbi); } else{ print {*STDERR} 'Problem Could not find accession '.$dr->{ACCESSION}.' for source '.$dr->{SOURCE}. @@ -668,9 +661,10 @@ sub upload_direct_xrefs{ ############################################### sub add_meta_pair { - my ($self, $key, $value) = @_; + my ($self, $key, $value, $dbi) = @_; + $dbi = $self->dbi unless defined $dbi; - my $sth = $self->dbi->prepare('insert into meta (meta_key, meta_value, date) values("'.$key.'", "'.$value.'", now())'); + my $sth = $dbi->prepare('insert into meta (meta_key, meta_value, date) values("'.$key.'", "'.$value.'", now())'); $sth->execute; $sth->finish; return; @@ -684,9 +678,10 @@ sub add_meta_pair { sub get_xref_sources { my $self = shift; + my $dbi = shift; + $dbi = $self->dbi unless defined $dbi; my %sourcename_to_sourceid; - my $dbi = $self->dbi; my $sth = $dbi->prepare('SELECT name,source_id FROM source'); $sth->execute() or croak( $dbi->errstr() ); while(my @row = $sth->fetchrow_array()) { @@ -705,9 +700,10 @@ sub get_xref_sources { sub species_id2taxonomy { my $self = shift; + my $dbi = shift; + $dbi = $self->dbi unless defined $dbi; my %species_id2taxonomy; - my $dbi = $self->dbi; my $sth = $dbi->prepare('SELECT species_id, taxonomy_id FROM species'); $sth->execute() or croak( $dbi->errstr() ); @@ -731,10 +727,11 @@ sub species_id2taxonomy { ######################################################################### sub species_id2name { my $self = shift; + my $dbi = shift; + $dbi = $self->dbi unless defined $dbi; my %species_id2name; - my $dbi = $self->dbi; my $sth = $dbi->prepare('SELECT species_id, name FROM species'); $sth->execute() or croak( $dbi->errstr() ); while ( my @row = $sth->fetchrow_array() ) { @@ -771,8 +768,9 @@ sub get_xref_id { my $source = $arg_ref->{source_id} || croak 'Need an source_id for get_xref_id'; my $species = $arg_ref->{species_id} || confess 'Need an species_id for get_xref_id'; my $error = $arg_ref->{error}; + my $dbi = $arg_ref->{dbi}; - my $id = $self->get_xref($acc, $source, $species); + my $id = $self->get_xref($acc, $source, $species, $dbi); return $id; } @@ -783,11 +781,11 @@ sub get_xref_id { ################################################################## sub primary_xref_id_exists { - my ($self, $xref_id) = @_; + my ($self, $xref_id, $dbi) = @_; + $dbi = $self->dbi unless defined $dbi; my $exists = 0; - my $dbi = $self->dbi; my $sth = $dbi->prepare('SELECT xref_id FROM primary_xref WHERE xref_id=?'); $sth->execute($xref_id) or croak( $dbi->errstr() ); my @row = $sth->fetchrow_array(); @@ -802,10 +800,10 @@ sub primary_xref_id_exists { # Get the tax id for a particular species id ############################################ sub get_taxonomy_from_species_id{ - my ($self,$species_id) = @_; + my ($self,$species_id, $dbi) = @_; my %hash; - my $dbi= $self->dbi; + $dbi = $self->dbi unless defined $dbi; my $sth = $dbi->prepare("SELECT taxonomy_id FROM species WHERE species_id = $species_id"); $sth->execute() or croak( $dbi->errstr() ); while(my @row = $sth->fetchrow_array()) { @@ -821,11 +819,11 @@ sub get_taxonomy_from_species_id{ # Only used in GOParser at the moment ################################################ sub get_direct_xref{ - my ($self,$stable_id,$type,$link) = @_; + my ($self,$stable_id,$type,$link, $dbi) = @_; + $dbi = $self->dbi unless defined $dbi; $type = lc $type; - my $dbi = $self->dbi; my $sql = "select general_xref_id from ${type}_direct_xref d where ensembl_stable_id = ? and linkage_xref= ?"; my $direct_sth = $dbi->prepare($sql); @@ -842,16 +840,14 @@ sub get_direct_xref{ # if not found return undef; ################################################################### sub get_xref{ - my ($self,$acc,$source, $species_id) = @_; + my ($self,$acc,$source, $species_id, $dbi) = @_; + $dbi = $self->dbi unless defined $dbi; - my $dbi = $self->dbi; # # If the statement handle does nt exist create it. # - if(!(defined $get_xref_sth) ){ - my $sql = 'select xref_id from xref where accession = ? and source_id = ? and species_id = ?'; - $get_xref_sth = $dbi->prepare($sql); - } + my $sql = 'select xref_id from xref where accession = ? and source_id = ? and species_id = ?'; + my $get_xref_sth = $dbi->prepare($sql); # # Find the xref_id using the sql above @@ -868,16 +864,11 @@ sub get_xref{ # if not found return undef; ################################################################### sub get_object_xref { - my ($self, $xref_id, $ensembl_id, $object_type) = @_; + my ($self, $xref_id, $ensembl_id, $object_type, $dbi) = @_; + $dbi = $self->dbi unless defined $dbi; - my $dbi = $self->dbi; - # - # If the statement handle does nt exist create it. - # - if(!(defined $get_object_xref_sth) ){ - my $sql = 'select object_xref_id from object_xref where xref_id = ? and ensembl_object_type = ? and ensembl_id = ?'; - $get_object_xref_sth = $dbi->prepare($sql); - } + my $sql = 'select object_xref_id from object_xref where xref_id = ? and ensembl_object_type = ? and ensembl_id = ?'; + my $get_object_xref_sth = $dbi->prepare($sql); # # Find the object_xref_id using the sql above @@ -906,27 +897,22 @@ sub add_xref { my $version = $arg_ref->{version} || 0; my $info_type = $arg_ref->{info_type} || 'MISC'; my $info_text = $arg_ref->{info_text} || ''; + my $dbi = $arg_ref->{dbi}; + $dbi = $self->dbi unless defined $dbi; ################################################################## # See if it already exists. It so return the xref_id for this one. ################################################################## - my $xref_id = $self->get_xref($acc,$source_id, $species_id); + my $xref_id = $self->get_xref($acc,$source_id, $species_id, $dbi); if(defined $xref_id){ return $xref_id; } - - ####################################################################### - # If the statement handle for the insertion of xrefs does not exist yet - # then create it - ####################################################################### - if (!(defined $add_xref_sth) ) { - $add_xref_sth = - $self->dbi->prepare( 'INSERT INTO xref ' + my $add_xref_sth = + $dbi->prepare( 'INSERT INTO xref ' . '(accession,version,label,description,source_id,species_id, info_type, info_text) ' . 'VALUES(?,?,?,?,?,?,?,?)' ); - } ###################################################################### # If the description is more than 255 characters, chop it off and add @@ -962,26 +948,23 @@ sub add_object_xref { my $xref_id = $arg_ref->{xref_id} || croak 'add_object_xref needs an xref_id'; my $ensembl_id = $arg_ref->{ensembl_id} || croak 'add_object_xref needs a ensembl_id'; my $object_type = $arg_ref->{object_type} || croak 'add_object_xref needs an object_type'; + my $dbi = $arg_ref->{dbi}; + + $dbi = $self->dbi unless defined $dbi; ################################################################## # See if it already exists. It so return the xref_id for this one. ################################################################## - my $object_xref_id = $self->get_object_xref($xref_id, $ensembl_id, $object_type); + my $object_xref_id = $self->get_object_xref($xref_id, $ensembl_id, $object_type, $dbi); if(defined $object_xref_id){ return $object_xref_id; } - ####################################################################### - # If the statement handle for the insertion of object_xrefs does not exist yet - # then create it - ####################################################################### - if (!(defined $add_object_xref_sth) ) { - $add_object_xref_sth = - $self->dbi->prepare( 'INSERT INTO object_xref' + my $add_object_xref_sth = + $dbi->prepare( 'INSERT INTO object_xref' . '(ensembl_id, ensembl_object_type, xref_id) ' . 'VALUES(?,?,?)' ); - } #################################### # Add the object_xref and croak if it fails @@ -1003,17 +986,14 @@ sub add_identity_xref { my $score = $arg_ref->{score} || croak 'add_identity_xref needs a score'; my $target_identity = $arg_ref->{target_identity} || croak 'add_identity_xref needs a target_identity'; my $query_identity = $arg_ref->{query_identity} || croak 'add_identity_xref needs a query_identity'; + my $dbi = $arg_ref->{dbi}; - ####################################################################### - # If the statement handle for the insertion of object_xrefs does not exist yet - # then create it - ####################################################################### - if (!(defined $add_identity_xref_sth) ) { - $add_identity_xref_sth = - $self->dbi->prepare( 'INSERT INTO identity_xref' + $dbi = $self->dbi unless defined $dbi; + + my $add_identity_xref_sth = + $dbi->prepare( 'INSERT INTO identity_xref' . '(object_xref_id, score, query_identity, target_identity) ' . 'VALUES(?,?,?,?)' ); - } #################################### # Add the object_xref and croak if it fails @@ -1041,24 +1021,20 @@ sub add_to_direct_xrefs{ my $description = $arg_ref->{desc}; my $linkage = $arg_ref->{linkage}; my $info_text = $arg_ref->{info_text} || ''; + my $dbi = $arg_ref->{dbi}; - my $dbi= $self->dbi(); + $dbi = $self->dbi unless defined $dbi; - ###################### - # Get statement handle - ###################### - if(!(defined $add_xref_sth)){ - my $sql = (<<'AXX'); + my $sql = (<<'AXX'); INSERT INTO xref (accession,version,label,description,source_id,species_id, info_type, info_text) VALUES (?,?,?,?,?,?,?,?) AXX - $add_xref_sth = $dbi->prepare($sql); - } + my $add_xref_sth = $dbi->prepare($sql); ############################################################### # If the acc already has an xrefs find it else cretae a new one ############################################################### - my $direct_id = $self->get_xref($acc, $source_id, $species_id); + my $direct_id = $self->get_xref($acc, $source_id, $species_id, $dbi); if(!(defined $direct_id)){ $add_xref_sth->execute( $acc, $version || 0, $label, @@ -1066,7 +1042,7 @@ AXX ) or croak("$acc\t$label\t\t$source_id\t$species_id\n"); } - $direct_id = $self->get_xref($acc, $source_id, $species_id); + $direct_id = $self->get_xref($acc, $source_id, $species_id, $dbi); ######################### # Now add the direct info @@ -1084,33 +1060,12 @@ sub add_direct_xref { my ($self, $general_xref_id, $ensembl_stable_id, $ensembl_type, $linkage_type, $dbi) = @_; $dbi = $self->dbi unless defined $dbi; - ####################################################### - # Create statement handles if they do not exist already - ######################################################## - if (!(defined $add_direct_xref_sth{$ensembl_type})){ - my $add_gene_direct_xref_sth = $dbi->prepare('INSERT INTO gene_direct_xref VALUES(?,?,?)'); - my $add_tr_direct_xref_sth = $dbi->prepare('INSERT INTO transcript_direct_xref VALUES(?,?,?)'); - my $add_tl_direct_xref_sth = $dbi->prepare('INSERT INTO translation_direct_xref VALUES(?,?,?)'); - $add_direct_xref_sth{'gene'} = $add_gene_direct_xref_sth; - $add_direct_xref_sth{'transcript'} = $add_tr_direct_xref_sth; - $add_direct_xref_sth{'translation'} = $add_tl_direct_xref_sth; - $add_direct_xref_sth{'Gene'} = $add_gene_direct_xref_sth; - $add_direct_xref_sth{'Transcript'} = $add_tr_direct_xref_sth; - $add_direct_xref_sth{'Translation'} = $add_tl_direct_xref_sth; - } - ############################## - # Make sure type is recognised - ############################## - if(!(defined $add_direct_xref_sth{$ensembl_type})){ - croak "ERROR add_direct_xref_sth does not exist for $ensembl_type ???"; - } - else{ - ########################## - # Add the direct xref data - ########################## - $add_direct_xref_sth{$ensembl_type}->execute($general_xref_id, $ensembl_stable_id, $linkage_type); - } + $ensembl_type = lc($ensembl_type); + my $sql = "INSERT INTO " . $ensembl_type . "_direct_xref VALUES (?,?,?)"; + my $add_direct_xref_sth = $dbi->prepare($sql); + + $add_direct_xref_sth->execute($general_xref_id, $ensembl_stable_id, $linkage_type); return; } @@ -1130,41 +1085,35 @@ sub add_dependent_xref{ my $description = $arg_ref->{desc}; my $linkage = $arg_ref->{linkage}; my $info_text = $arg_ref->{info_text} || ''; + my $dbi = $arg_ref->{dbi}; - my $dbi = $self->dbi; + $dbi = $self->dbi unless defined $dbi; - ######################################## - # Create/Get the statement handle needed - ######################################## - if(!(defined $add_xref_sth)){ - my $sql = (<<'IXR'); + my $sql = (<<'IXR'); INSERT INTO xref (accession,version,label,description,source_id,species_id, info_type, info_text) VALUES (?,?,?,?,?,?,?,?) IXR - $add_xref_sth = $dbi->prepare($sql); - } - if(!(defined $add_dependent_xref_sth)){ - my $sql = (<<'ADX'); + my $add_xref_sth = $dbi->prepare($sql); + $sql = (<<'ADX'); INSERT INTO dependent_xref (master_xref_id,dependent_xref_id,linkage_annotation,linkage_source_id) VALUES (?,?,?,?) ADX - $add_dependent_xref_sth = $dbi->prepare($sql); - } + my $add_dependent_xref_sth = $dbi->prepare($sql); #################################################### # Does the xref already exist. If so get its xref_id # else create it and get the new xref_id #################################################### - my $dependent_id = $self->get_xref($acc, $source_id, $species_id); + my $dependent_id = $self->get_xref($acc, $source_id, $species_id, $dbi); if(!(defined $dependent_id)){ $add_xref_sth->execute( $acc, $version, $label, $description, $source_id, $species_id, 'DEPENDENT', $info_text ) or croak("$acc\t$label\t\t$source_id\t$species_id\n"); } - $dependent_id = $self->get_xref($acc, $source_id, $species_id); + $dependent_id = $self->get_xref($acc, $source_id, $species_id, $dbi); ################################################ # Croak if we have failed to create.get the xref @@ -1193,15 +1142,13 @@ ADX # but want to write synonyms for each with the same accession ################################################################## sub add_to_syn_for_mult_sources{ - my ($self, $acc, $sources, $syn, $species_id) = @_; + my ($self, $acc, $sources, $syn, $species_id, $dbi) = @_; - my $dbi = $self->dbi; - if(!(defined $add_synonym_sth)){ - $add_synonym_sth = $dbi->prepare('INSERT IGNORE INTO synonym VALUES(?,?)'); - } + $dbi = $self->dbi unless defined $dbi; + my $add_synonym_sth = $dbi->prepare('INSERT IGNORE INTO synonym VALUES(?,?)'); foreach my $source_id (@{$sources}){ - my $xref_id = $self->get_xref($acc, $source_id, $species_id); + my $xref_id = $self->get_xref($acc, $source_id, $species_id, $dbi); if(defined $xref_id){ $add_synonym_sth->execute( $xref_id, $syn ) or croak( $dbi->errstr() . "\n $xref_id\n $syn\n" ); @@ -1215,13 +1162,11 @@ sub add_to_syn_for_mult_sources{ # Add synomyn for an xref given by accession and source_id ########################################################## sub add_to_syn{ - my ($self, $acc, $source_id, $syn, $species_id) = @_; + my ($self, $acc, $source_id, $syn, $species_id, $dbi) = @_; - my $dbi = $self->dbi; - if(!(defined $add_synonym_sth)){ - $add_synonym_sth = $dbi->prepare('INSERT IGNORE INTO synonym VALUES(?,?)'); - } - my $xref_id = $self->get_xref($acc, $source_id, $species_id); + $dbi = $self->dbi unless defined $dbi; + my $add_synonym_sth = $dbi->prepare('INSERT IGNORE INTO synonym VALUES(?,?)'); + my $xref_id = $self->get_xref($acc, $source_id, $species_id, $dbi); if(defined $xref_id){ $add_synonym_sth->execute( $xref_id, $syn ) or croak( $dbi->errstr() . "\n $xref_id\n $syn\n" ); @@ -1238,12 +1183,10 @@ sub add_to_syn{ # Add synomyn for an xref given by xref_id ########################################## sub add_synonym{ - my ($self, $xref_id, $syn) = @_; + my ($self, $xref_id, $syn, $dbi) = @_; - my $dbi=$self->dbi; - if(!(defined $add_synonym_sth)){ - $add_synonym_sth = $dbi->prepare('INSERT IGNORE INTO synonym VALUES(?,?)'); - } + $dbi = $self->dbi unless defined $dbi; + my $add_synonym_sth = $dbi->prepare('INSERT IGNORE INTO synonym VALUES(?,?)'); $add_synonym_sth->execute( $xref_id, $syn ) or croak( $dbi->errstr()."\n $xref_id\n $syn\n\n" ); @@ -1256,9 +1199,11 @@ sub add_synonym{ # as keys. ####################################################### sub get_label_to_acc{ - my ($self, $name, $species_id, $prio_desc) = @_; + my ($self, $name, $species_id, $prio_desc, $dbi) = @_; my %hash1=(); + $dbi = $self->dbi unless defined $dbi; + my $sql =(<<"GLA"); SELECT xref.accession, xref.label FROM xref, source @@ -1271,7 +1216,7 @@ GLA if(defined $species_id){ $sql .= " and xref.species_id = $species_id"; } - my $sub_sth = $self->dbi->prepare($sql); + my $sub_sth = $dbi->prepare($sql); $sub_sth->execute(); while(my @row = $sub_sth->fetchrow_array()) { @@ -1297,7 +1242,7 @@ GLS if(defined $species_id){ $sql .= " AND xref.species_id = $species_id"; } - $sub_sth = $self->dbi->prepare($sql); + $sub_sth = $dbi->prepare($sql); $sub_sth->execute(); while(my @row = $sub_sth->fetchrow_array()) { @@ -1312,9 +1257,11 @@ GLS # and the label as the value. ####################################################### sub get_acc_to_label{ - my ($self, $name, $species_id, $prio_desc) = @_; + my ($self, $name, $species_id, $prio_desc, $dbi) = @_; my %hash1=(); + $dbi = $self->dbi unless defined $dbi; + my $sql =(<<"GLA"); SELECT xref.accession, xref.label FROM xref, source @@ -1327,7 +1274,7 @@ GLA if(defined $species_id){ $sql .= " and xref.species_id = $species_id"; } - my $sub_sth = $self->dbi->prepare($sql); + my $sub_sth = $dbi->prepare($sql); $sub_sth->execute(); while(my @row = $sub_sth->fetchrow_array()) { @@ -1344,10 +1291,10 @@ GLA # as keys. ####################################################### sub get_label_to_desc{ - my ($self, $name, $species_id, $prio_desc) = @_; + my ($self, $name, $species_id, $prio_desc, $dbi) = @_; my %hash1=(); - my $dbi = $self->dbi(); + $dbi = $self->dbi unless defined $dbi; my $sql =(<<"GDH"); SELECT xref.description, xref.label @@ -1401,10 +1348,12 @@ GDS # Set release for a particular source_id. ######################################## sub set_release{ - my ($self, $source_id, $s_release ) = @_; + my ($self, $source_id, $s_release, $dbi ) = @_; + + $dbi = $self->dbi unless defined $dbi; my $sth = - $self->dbi->prepare('UPDATE source SET source_release=? WHERE source_id=?'); + $dbi->prepare('UPDATE source SET source_release=? WHERE source_id=?'); if($verbose) { print "Setting release to '$s_release' for source ID '$source_id'\n"; } @@ -1420,6 +1369,9 @@ sub set_release{ sub get_dependent_mappings { my $self = shift; my $source_id = shift; + my $dbi = shift; + + $dbi = $self->dbi unless defined $dbi; my $sql =(<<"GDM"); SELECT d.master_xref_id, d.dependent_xref_id @@ -1427,7 +1379,7 @@ sub get_dependent_mappings { WHERE x.xref_id = d.dependent_xref_id AND x.source_id = $source_id GDM - my $sth = $self->dbi->prepare($sql); + my $sth = $dbi->prepare($sql); $sth->execute(); my $master_xref; my $dependent_xref; @@ -1447,6 +1399,8 @@ GDM sub get_ext_synonyms{ my $self = shift; my $source_name = shift; + my $dbi = shift; + $dbi = $self->dbi unless defined $dbi; my %ext_syns; my %seen; # can be in more than once fro each type of external source. my $separator = qw{:}; @@ -1458,7 +1412,7 @@ sub get_ext_synonyms{ so.source_id = x.source_id AND so.name like '$source_name' GES - my $sth = $self->dbi->prepare($sql); + my $sth = $dbi->prepare($sql); $sth->execute; my ($acc, $label, $syn); @@ -1485,6 +1439,8 @@ GES ###################################################################### sub parsing_finished_store_data { my $self = shift; + my $dbi = shift; + $dbi = $self->dbi unless defined $dbi; # Store max id for @@ -1500,7 +1456,7 @@ sub parsing_finished_store_data { ( 'xref' => 'xref_id', 'object_xref' => 'object_xref_id' ); foreach my $table ( keys %table_and_key ) { - my $sth = $self->dbi->prepare( + my $sth = $dbi->prepare( 'select MAX(' . $table_and_key{$table} . ") from $table" ); $sth->execute; my $max_val; @@ -1508,16 +1464,17 @@ sub parsing_finished_store_data { $sth->fetch; $sth->finish; $self->add_meta_pair( 'PARSED_' . $table_and_key{$table}, - $max_val || 1 ); + $max_val || 1, $dbi ); } return; } ## end sub parsing_finished_store_data sub get_meta_value { - my ($self, $key) = @_; + my ($self, $key, $dbi) = @_; + $dbi = $self->dbi unless defined $dbi; - my $sth = $self->dbi->prepare('select meta_value from meta where meta_key like "'.$key.'" order by meta_id'); + my $sth = $dbi->prepare('select meta_value from meta where meta_key like "'.$key.'" order by meta_id'); $sth->execute(); my $value; $sth->bind_columns(\$value); diff --git a/misc-scripts/xref_mapping/XrefParser/CCDSParser.pm b/misc-scripts/xref_mapping/XrefParser/CCDSParser.pm index 26710ddcb6..be3f0e3c62 100644 --- a/misc-scripts/xref_mapping/XrefParser/CCDSParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/CCDSParser.pm @@ -37,6 +37,8 @@ sub run_script { my $file = $ref_arg->{file}; my $verbose = $ref_arg->{verbose}; my $db = $ref_arg->{dba}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){ croak "Need to pass source_id, species_id and file as pairs"; @@ -106,6 +108,7 @@ SCD label => $display_label, source_id => $source_id, species_id => $species_id, + dbi => $dbi, info_type => "DIRECT"} ); $xref_count++; $seen{$display_label} = $xref_id; @@ -114,7 +117,7 @@ SCD $xref_id = $seen{$display_label}; } - $self->add_direct_xref($xref_id, $stable_id, "Transcript", ""); + $self->add_direct_xref($xref_id, $stable_id, "Transcript", "", $dbi); $direct_count++; } diff --git a/misc-scripts/xref_mapping/XrefParser/CoreXrefParser.pm b/misc-scripts/xref_mapping/XrefParser/CoreXrefParser.pm index 254f3d1a78..46113b19cc 100644 --- a/misc-scripts/xref_mapping/XrefParser/CoreXrefParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/CoreXrefParser.pm @@ -34,6 +34,8 @@ sub run_script { my $species_id = $ref_arg->{species_id}; my $file = $ref_arg->{file}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){ croak "Need to pass source_id, species_id and file as pairs"; @@ -62,7 +64,7 @@ sub run_script { $copy_description_from_object = $1; } - my $external_db_name = $self->get_source_name_for_source_id($source_id); + my $external_db_name = $self->get_source_name_for_source_id($source_id, $dbi); #copy object xrefs from core @@ -96,7 +98,7 @@ sub run_script { } #get the species name - my %id2name = $self->species_id2name; + my %id2name = $self->species_id2name($dbi); my $species_name = $id2name{$species_id}[0]; if (!$object_type) { @@ -173,7 +175,7 @@ sub run_script { # but we need to attach them specifcally # to RNAmmer, tRNAScan or RFAM # so get the source based on the db_name from the core db - my $external_source_id = $self->get_source_id_for_source_name($db_name); + my $external_source_id = $self->get_source_id_for_source_name($db_name, undef, $dbi); if (! defined $external_source_id) { warn ("can't get a source_id for external_db, $db_name!\n"); @@ -200,6 +202,7 @@ sub run_script { desc => $description, source_id => $external_source_id, species_id => $species_id, + dbi => $dbi, info_type => "DIRECT"} ); @@ -210,7 +213,7 @@ sub run_script { $xref_id = $added_xref{$xref->primary_id()}; } - $self->add_direct_xref($xref_id, $object->stable_id(), $valid_object_types{$object_type}, ""); + $self->add_direct_xref($xref_id, $object->stable_id(), $valid_object_types{$object_type}, "", $dbi); $direct_count++; } } diff --git a/misc-scripts/xref_mapping/XrefParser/DBASSParser.pm b/misc-scripts/xref_mapping/XrefParser/DBASSParser.pm index b81211b0ec..c229ce9b05 100644 --- a/misc-scripts/xref_mapping/XrefParser/DBASSParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/DBASSParser.pm @@ -44,6 +44,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -101,7 +103,7 @@ sub run { ++$parsed_count; - my $xref_id = $self->get_xref( $dbass_gene_id, $source_id, $species_id ); + my $xref_id = $self->get_xref( $dbass_gene_id, $source_id, $species_id, $dbi ); if ( !defined($xref_id) || $xref_id eq '' ) { $xref_id = $self->add_xref({ acc => $dbass_gene_id, @@ -109,14 +111,15 @@ sub run { label => $label, desc => $description, source_id => $source_id, + dbi => $dbi, species_id => $species_id, info_type => "DIRECT"} ); } - $self->add_direct_xref( $xref_id, $ensembl_id, $type, ''); + $self->add_direct_xref( $xref_id, $ensembl_id, $type, '', $dbi); if (defined ($synonym)) { - $self->add_synonym($xref_id, $synonym); + $self->add_synonym($xref_id, $synonym, $dbi); } elsif ($synonym =~ /^\s/){ print "There is white space! \n" if($verbose); diff --git a/misc-scripts/xref_mapping/XrefParser/EntrezGeneParser.pm b/misc-scripts/xref_mapping/XrefParser/EntrezGeneParser.pm index 9a6fef0082..5a4ba1e9b2 100644 --- a/misc-scripts/xref_mapping/XrefParser/EntrezGeneParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/EntrezGeneParser.pm @@ -32,6 +32,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id, files and rel_file as pairs"; @@ -40,9 +42,9 @@ sub run { my $file = @{$files}[0]; - my $wiki_source_id = $self->get_source_id_for_source_name("WikiGene"); + my $wiki_source_id = $self->get_source_id_for_source_name("WikiGene", undef, $dbi); - my %species_tax_id = %{$self->get_taxonomy_from_species_id($species_id)}; + my %species_tax_id = %{$self->get_taxonomy_from_species_id($species_id, $dbi)}; my $eg_io = $self->get_filehandle($file); @@ -120,6 +122,7 @@ sub run { desc => $desc, source_id => $source_id, species_id => $species_id, + dbi => $dbi, info_type =>"DEPENDENT"} ); $self->add_xref({ acc => $acc, @@ -127,13 +130,14 @@ sub run { desc => $desc, source_id => $wiki_source_id, species_id => $species_id, + dbi => $dbi, info_type => "DEPENDENT" } ); #,"From EntrezGene $acc"); $xref_count++; my (@syn) = split(/\|/ ,$arr[$gene_synonyms_index]); foreach my $synonym (@syn){ if($synonym ne "-"){ - $self->add_to_syn($acc, $source_id, $synonym, $species_id); + $self->add_to_syn($acc, $source_id, $synonym, $species_id, $dbi); $syn_count++; } } diff --git a/misc-scripts/xref_mapping/XrefParser/HGNCParser.pm b/misc-scripts/xref_mapping/XrefParser/HGNCParser.pm index 814a7c578b..3ec3d6be67 100644 --- a/misc-scripts/xref_mapping/XrefParser/HGNCParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/HGNCParser.pm @@ -33,6 +33,8 @@ sub run_script { my $file = $ref_arg->{file}; my $verbose = $ref_arg->{verbose}; my $db = $ref_arg->{dba}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){ croak "Need to pass source_id, species_id, file as pairs"; @@ -87,14 +89,14 @@ sub run_script { } - my (%swissprot) = %{$self->get_valid_codes('Uniprot/SWISSPROT',$species_id)}; - my (%refseq) = %{$self->get_valid_codes('refseq',$species_id)}; + my (%swissprot) = %{$self->get_valid_codes('Uniprot/SWISSPROT',$species_id, $dbi)}; + my (%refseq) = %{$self->get_valid_codes('refseq',$species_id, $dbi)}; my @list; push @list, 'refseq_peptide'; push @list, 'refseq_mRNA'; - my (%entrezgene) = %{$self->get_valid_xrefs_for_dependencies('EntrezGene',@list)}; - my $source_name = $self->get_source_name_for_source_id($source_id); - my $name_to_source_id = $self->get_sources($source_name); + my (%entrezgene) = %{$self->get_valid_xrefs_for_dependencies('EntrezGene', $dbi, @list)}; + my $source_name = $self->get_source_name_for_source_id($source_id, $dbi); + my $name_to_source_id = $self->get_sources($source_name, $dbi); my %name_count; @@ -157,11 +159,13 @@ sub run_script { label => $symbol, desc => $name, source_id => $source_id, + dbi => $dbi, species_id => $species_id} ); $self->add_synonyms_for_hgnc( {source_id => $source_id, name => $acc, species_id => $species_id, + dbi => $dbi, dead => $previous_symbols, alias => $synonyms} ); $name_count{$type}++; @@ -187,11 +191,13 @@ sub run_script { label => $symbol, desc => $name, source_id => $source_id, + dbi => $dbi, species_id => $species_id} ); $self->add_synonyms_for_hgnc( {source_id => $source_id, name => $acc, species_id => $species_id, + dbi => $dbi, dead => $previous_symbols, alias => $synonyms} ); $name_count{$type}++; @@ -211,7 +217,8 @@ sub run_script { type => 'gene', acc => $acc, label => $symbol, - desc => $name,, + desc => $name, + dbi => $dbi, source_id => $source_id, species_id => $species_id} ); @@ -219,6 +226,7 @@ sub run_script { name => $acc, species_id => $species_id, dead => $previous_symbols, + dbi => $dbi, alias => $synonyms}); } @@ -236,11 +244,13 @@ sub run_script { label => $symbol, desc => $name || '', source_id => $source_id, + dbi => $dbi, species_id => $species_id} ); } $self->add_synonyms_for_hgnc( {source_id => $source_id, name => $acc, species_id => $species_id, + dbi => $dbi, dead => $previous_symbols, alias => $synonyms}); } @@ -260,12 +270,14 @@ sub run_script { label => $symbol, desc => $name || '', source_id => $source_id, + dbi => $dbi, species_id => $species_id} ); $name_count{$type}++; $self->add_synonyms_for_hgnc( {source_id => $source_id, name => $acc, species_id => $species_id, dead => $previous_symbols, + dbi => $dbi, alias => $synonyms}); } } @@ -278,11 +290,13 @@ sub run_script { desc => $name, source_id => $source_id, species_id => $species_id, + dbi => $dbi, info_type => "MISC"} ); $self->add_synonyms_for_hgnc( {source_id => $source_id, name => $acc, species_id => $species_id, + dbi => $dbi, dead => $previous_symbols, alias => $synonyms}); $mismatch++; @@ -305,20 +319,21 @@ sub run_script { sub get_sources { my $self = shift; my $source_name = shift; + my $dbi = shift; my %name_to_source_id; my @sources = ('entrezgene_manual', 'refseq_manual', 'entrezgene_mapped', 'refseq_mapped', 'ensembl_manual', 'swissprot_manual', 'desc_only', 'ccds'); foreach my $key (@sources) { - my $source_id = $self->get_source_id_for_source_name($source_name, $key); + my $source_id = $self->get_source_id_for_source_name($source_name, $key, $dbi); if(!(defined $source_id)){ die 'Could not get source id for HGNC and '. $key ."\n"; } $name_to_source_id{ $key } = $source_id; } - my $source_id = $self->get_source_id_for_source_name('LRG_HGNC_notransfer'); + my $source_id = $self->get_source_id_for_source_name('LRG_HGNC_notransfer', undef, $dbi); if(!(defined $source_id) ){ die 'Could not get source id for LRG_HGNC_notransfer\n'; } @@ -335,18 +350,19 @@ sub add_synonyms_for_hgnc{ my $species_id = $ref_arg->{species_id}; my $dead_name = $ref_arg->{dead}; my $alias = $ref_arg->{alias}; + my $dbi = $ref_arg->{dbi}; if (defined $dead_name ) { # dead name, add to synonym my @array2 = split ',\s*', $dead_name ; foreach my $arr (@array2){ - $self->add_to_syn($name, $source_id, $arr, $species_id); + $self->add_to_syn($name, $source_id, $arr, $species_id, $dbi); } } if (defined $alias ) { # alias, add to synonym my @array2 = split ',\s*', $alias; foreach my $arr (@array2){ - $self->add_to_syn($name, $source_id, $arr, $species_id); + $self->add_to_syn($name, $source_id, $arr, $species_id, $dbi); } } return; diff --git a/misc-scripts/xref_mapping/XrefParser/HGNC_CCDSParser.pm b/misc-scripts/xref_mapping/XrefParser/HGNC_CCDSParser.pm index 40f7e76b8f..f6ea32982a 100644 --- a/misc-scripts/xref_mapping/XrefParser/HGNC_CCDSParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/HGNC_CCDSParser.pm @@ -37,6 +37,8 @@ sub run_script { my $species_id = $ref_arg->{species_id}; my $file = $ref_arg->{file}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){ croak "Need to pass source_id, species_id and file as pairs"; @@ -148,12 +150,10 @@ SQL my %version; my %description; - my $dbi = $self->dbi(); - my $sql_syn = "insert ignore into synonym (xref_id, synonym) values (?, ?)"; my $add_syn_sth = $dbi->prepare($sql_syn); - my $syn_hash = $self->get_ext_synonyms("HGNC"); + my $syn_hash = $self->get_ext_synonyms("HGNC", $dbi); $sql = 'select source_id, priority_description from source where name like "HGNC"'; $sth = $dbi->prepare($sql); @@ -194,10 +194,11 @@ SQL desc => $description{$hgnc}, source_id => $source_id, species_id => $species_id, + dbi => $dbi, info_type => "DIRECT"} ); foreach my $stable_id (@{$ccds_to_stable_id{$ccds}}){ - $self->add_direct_xref($xref_id, $stable_id, "Transcript", ""); + $self->add_direct_xref($xref_id, $stable_id, "Transcript", "", $dbi); $direct_count++; } diff --git a/misc-scripts/xref_mapping/XrefParser/HPAParser.pm b/misc-scripts/xref_mapping/XrefParser/HPAParser.pm index 2d475a2ca0..7de5e6145b 100644 --- a/misc-scripts/xref_mapping/XrefParser/HPAParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/HPAParser.pm @@ -40,6 +40,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id, files and rel_file as pairs"; @@ -78,7 +80,7 @@ sub run { ++$parsed_count; - my $xref_id = $self->get_xref( $antibody_id, $source_id, $species_id ); + my $xref_id = $self->get_xref( $antibody_id, $source_id, $species_id, $dbi ); if ( !defined($xref_id) || $xref_id eq '' ) { $xref_id = $self->add_xref({ acc => $antibody_id, @@ -87,11 +89,12 @@ sub run { desc => $description, source_id => $source_id, species_id => $species_id, + dbi => $dbi, info_type => "DIRECT"} ); } - $self->add_direct_xref( $xref_id, $ensembl_peptide_id, $type, ''); + $self->add_direct_xref( $xref_id, $ensembl_peptide_id, $type, '', $dbi); } ## end while ( defined( my $line... diff --git a/misc-scripts/xref_mapping/XrefParser/JGI_Parser.pm b/misc-scripts/xref_mapping/XrefParser/JGI_Parser.pm index f4d71ee3c9..64d3896472 100644 --- a/misc-scripts/xref_mapping/XrefParser/JGI_Parser.pm +++ b/misc-scripts/xref_mapping/XrefParser/JGI_Parser.pm @@ -40,6 +40,8 @@ sub run { my $files = $ref_arg->{files}; my $release_file = $ref_arg->{rel_file}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -49,7 +51,7 @@ sub run { my $file = @{$files}[0]; - my $source_name = $self->get_source_name_for_source_id ($source_id) ; + my $source_name = $self->get_source_name_for_source_id ($source_id, $dbi) ; # the source name defines how to parse the header # different formats for different sources (all have entries in external_db.txt and populate_metadata.sql ) @@ -159,7 +161,7 @@ sub run { print scalar(@xrefs) . " JGI_ xrefs succesfully parsed\n" if($verbose); - $self->upload_xref_object_graphs(\@xrefs); + $self->upload_xref_object_graphs(\@xrefs, $dbi); return 0; # successful } diff --git a/misc-scripts/xref_mapping/XrefParser/MGIParser.pm b/misc-scripts/xref_mapping/XrefParser/MGIParser.pm index d80b67199b..49a13157ed 100644 --- a/misc-scripts/xref_mapping/XrefParser/MGIParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/MGIParser.pm @@ -33,6 +33,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -51,9 +53,6 @@ sub run { my %description; my %accession; - - my $dbi = $self->dbi(); - my $sql = 'select source_id, priority_description from source where name like "MGI"'; my $sth = $dbi->prepare($sql); @@ -86,7 +85,7 @@ sub run { $sql = "insert ignore into synonym (xref_id, synonym) values (?, ?)"; my $add_syn_sth = $dbi->prepare($sql); - my $syn_hash = $self->get_ext_synonyms("MGI"); + my $syn_hash = $self->get_ext_synonyms("MGI", $dbi); my $count = 0; my $syn_count = 0; @@ -101,10 +100,11 @@ sub run { label => $label{$acc}, desc => $description{$acc}, source_id => $source_id, + dbi => $dbi, species_id => $species_id, info_type => "DIRECT"} ); - $self->add_direct_xref( $xref_id, $ensid, "Gene", ''); + $self->add_direct_xref( $xref_id, $ensid, "Gene", '', $dbi); if(defined($syn_hash->{$acc})){ foreach my $syn (@{$syn_hash->{$acc}}){ $add_syn_sth->execute($xref_id, $syn); diff --git a/misc-scripts/xref_mapping/XrefParser/MGI_CCDS_Parser.pm b/misc-scripts/xref_mapping/XrefParser/MGI_CCDS_Parser.pm index ce7beb46fe..85400a547c 100644 --- a/misc-scripts/xref_mapping/XrefParser/MGI_CCDS_Parser.pm +++ b/misc-scripts/xref_mapping/XrefParser/MGI_CCDS_Parser.pm @@ -34,6 +34,8 @@ sub run_script { my $species_id = $ref_arg->{species_id}; my $file = $ref_arg->{file}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){ croak "Need to pass source_id, species_id and file as pairs"; @@ -52,8 +54,6 @@ sub run_script { my %description; my %accession; - my $dbi = $self->dbi(); - my $sql = 'select source_id, priority_description from source where name like "MGI"'; my $sth = $dbi->prepare($sql); @@ -135,6 +135,7 @@ sub run_script { label => $label{$acc}, desc => $description{$acc}, source_id => $source_id, + dbi => $dbi, species_id => $species_id }); $count++; diff --git a/misc-scripts/xref_mapping/XrefParser/MGI_Desc_Parser.pm b/misc-scripts/xref_mapping/XrefParser/MGI_Desc_Parser.pm index 44822308ae..6636fab174 100644 --- a/misc-scripts/xref_mapping/XrefParser/MGI_Desc_Parser.pm +++ b/misc-scripts/xref_mapping/XrefParser/MGI_Desc_Parser.pm @@ -36,6 +36,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -84,6 +86,7 @@ sub run { desc => $desc, source_id => $source_id, species_id => $species_id, + dbi => $dbi, info_type => "MISC"} ); if($verbose and !$desc){ print "$accession has no description\n"; @@ -93,7 +96,7 @@ sub run { if(defined($acc_to_xref{$accession})){ foreach my $syn (@synonyms) { - $self->add_synonym($acc_to_xref{$accession}, $syn); + $self->add_synonym($acc_to_xref{$accession}, $syn, $dbi); $syn_count++; } diff --git a/misc-scripts/xref_mapping/XrefParser/MIMParser.pm b/misc-scripts/xref_mapping/XrefParser/MIMParser.pm index 4f0f70b86a..82d637308f 100644 --- a/misc-scripts/xref_mapping/XrefParser/MIMParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/MIMParser.pm @@ -34,6 +34,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $general_source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -50,9 +52,9 @@ sub run { push @sources, $general_source_id; - my $gene_source_id = $self->get_source_id_for_source_name("MIM_GENE"); + my $gene_source_id = $self->get_source_id_for_source_name("MIM_GENE", undef, $dbi); push @sources, $gene_source_id; - my $morbid_source_id = $self->get_source_id_for_source_name("MIM_MORBID"); + my $morbid_source_id = $self->get_source_id_for_source_name("MIM_MORBID", undef, $dbi); push @sources, $morbid_source_id; print "sources are:- ".join(", ",@sources)."\n" if($verbose); @@ -111,6 +113,7 @@ sub run { desc => $long_desc, source_id => $gene_source_id, species_id => $species_id, + dbi => $dbi, info_type => "DEPENDENT"} ); } elsif((!defined $type) or ($type eq "") or ($type eq "#") or ($type eq "%")){ #phenotype only @@ -120,6 +123,7 @@ sub run { desc => $long_desc, source_id => $morbid_source_id, species_id => $species_id, + dbi => $dbi, info_type => "DEPENDENT"} ); } elsif($type eq "+"){ # both @@ -130,6 +134,7 @@ sub run { desc => $long_desc, source_id => $gene_source_id, species_id => $species_id, + dbi => $dbi, info_type => "DEPENDENT"} ); $self->add_xref({ acc => $number, @@ -137,6 +142,7 @@ sub run { desc => $long_desc, source_id => $morbid_source_id, species_id => $species_id, + dbi => $dbi, info_type => "DEPENDENT"} ); } elsif($type eq "^"){ @@ -164,7 +170,7 @@ sub run { $new = $old_to_new{$new}; } if(!defined($removed{$new})){ - $self->add_to_syn_for_mult_sources($new, \@sources, $old, $species_id); + $self->add_to_syn_for_mult_sources($new, \@sources, $old, $species_id, $dbi); $syn_count++; } } diff --git a/misc-scripts/xref_mapping/XrefParser/Mim2GeneParser.pm b/misc-scripts/xref_mapping/XrefParser/Mim2GeneParser.pm index 13991cf807..5292cb972a 100644 --- a/misc-scripts/xref_mapping/XrefParser/Mim2GeneParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/Mim2GeneParser.pm @@ -34,6 +34,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $general_source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -51,14 +53,13 @@ sub run { my $entrez_source_id = $self->get_source_id_for_source_name( - 'EntrezGene'); + 'EntrezGene', undef, $dbi); - my (%mim_gene) = %{$self->get_valid_codes("MIM_GENE",$species_id)}; - my (%mim_morbid) = %{$self->get_valid_codes("MIM_MORBID",$species_id)}; - my (%entrez) = %{$self->get_valid_codes("EntrezGene",$species_id)}; + my (%mim_gene) = %{$self->get_valid_codes("MIM_GENE",$species_id, $dbi)}; + my (%mim_morbid) = %{$self->get_valid_codes("MIM_MORBID",$species_id, $dbi)}; + my (%entrez) = %{$self->get_valid_codes("EntrezGene",$species_id, $dbi)}; - my $dbi = $self->dbi(); - my $add_dependent_xref_sth = $self->dbi->prepare("INSERT INTO dependent_xref (master_xref_id,dependent_xref_id, linkage_source_id) VALUES (?,?, $entrez_source_id)"); + my $add_dependent_xref_sth = $dbi->prepare("INSERT INTO dependent_xref (master_xref_id,dependent_xref_id, linkage_source_id) VALUES (?,?, $entrez_source_id)"); my $missed_entrez = 0; my $missed_omim = 0; diff --git a/misc-scripts/xref_mapping/XrefParser/PomBaseParser.pm b/misc-scripts/xref_mapping/XrefParser/PomBaseParser.pm index f28d89f18c..475bfd225d 100644 --- a/misc-scripts/xref_mapping/XrefParser/PomBaseParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/PomBaseParser.pm @@ -37,6 +37,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -45,8 +47,8 @@ sub run { my $file = @{$files}[0]; - my $gene_source_id = $self->get_source_id_for_source_name("PomBase_GENE"); - my $transcript_source_id = $self->get_source_id_for_source_name("PomBase_TRANSCRIPT"); + my $gene_source_id = $self->get_source_id_for_source_name("PomBase_GENE", undef, $dbi); + my $transcript_source_id = $self->get_source_id_for_source_name("PomBase_TRANSCRIPT", undef, $dbi); my $pombase_io = $self->get_filehandle($file); @@ -88,18 +90,20 @@ sub run { desc => $desc, source_id => $gene_source_id, species_id => $species_id, + dbi => $dbi, info_type => $info_type} ); - $self->add_direct_xref($ensembl_xref_id, $pombase_id, $ensembl_object_type, $info_type); + $self->add_direct_xref($ensembl_xref_id, $pombase_id, $ensembl_object_type, $info_type, $dbi); } elsif ($ensembl_object_type eq 'Transcript') { my $ensembl_xref_id = $self->add_xref({ acc => $pombase_id, label => $name, desc => $desc, + dbi => $dbi, source_id => $transcript_source_id, species_id => $species_id, info_type => $info_type} ); - $self->add_direct_xref($ensembl_xref_id, $pombase_id, $ensembl_object_type, $info_type); + $self->add_direct_xref($ensembl_xref_id, $pombase_id, $ensembl_object_type, $info_type, $dbi); } $xref_count++; @@ -109,7 +113,7 @@ sub run { if ($verbose) { print STDERR "adding synonym, $synonym\n"; } - $self->add_to_syn($pombase_id, $gene_source_id, $synonym, $species_id); + $self->add_to_syn($pombase_id, $gene_source_id, $synonym, $species_id, $dbi); $syn_count++; } } diff --git a/misc-scripts/xref_mapping/XrefParser/RFAMParser.pm b/misc-scripts/xref_mapping/XrefParser/RFAMParser.pm index df296026d6..868ec1ad24 100644 --- a/misc-scripts/xref_mapping/XrefParser/RFAMParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RFAMParser.pm @@ -33,9 +33,12 @@ sub run_script { my ($self, $ref_arg) = @_; my $source_id = $ref_arg->{source_id}; my $species_id = $ref_arg->{species_id}; + my $species_name = $ref_arg->{species}; my $file = $ref_arg->{file}; my $verbose = $ref_arg->{verbose}; - my $core_db = $ref_arg->{db}; + my $core_db = $ref_arg->{dba}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){ croak "Need to pass source_id, species_id and file as pairs"; @@ -74,8 +77,10 @@ sub run_script { my $dba; #get the species name - my %id2name = $self->species_id2name; - my $species_name = $id2name{$species_id}[0]; + my %id2name = $self->species_id2name($dbi); + if (defined $species_name) { push @{$id2name{$species_id}}, $species_name; } + if (!defined $id2name{$species_id}) { next; } + $species_name = $id2name{$species_id}[0]; if ($host) { $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new( @@ -166,11 +171,12 @@ sub run_script { desc => $description, source_id => $source_id, species_id => $species_id, + dbi => $dbi, info_type => "DIRECT"} ); my @transcript_stable_ids = @{$rfam_transcript_stable_ids{$accession}}; foreach my $stable_id (@transcript_stable_ids){ - $self->add_direct_xref($xref_id, $stable_id, "Transcript", ""); + $self->add_direct_xref($xref_id, $stable_id, "Transcript", "", $dbi); $direct_count++; } $xref_count++; diff --git a/misc-scripts/xref_mapping/XrefParser/RGDParser.pm b/misc-scripts/xref_mapping/XrefParser/RGDParser.pm index 3a865c3e99..cfae3fea4b 100644 --- a/misc-scripts/xref_mapping/XrefParser/RGDParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RGDParser.pm @@ -35,6 +35,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -42,7 +44,7 @@ sub run { $verbose |=0; my $source_sql = "select source_id from source where name = 'RGD' and priority_description = 'direct_xref'"; - my $sth = $self->dbi->prepare($source_sql); + my $sth = $dbi->prepare($source_sql); $sth->execute(); my ($direct_source_id); $sth->bind_columns(\$direct_source_id); @@ -51,9 +53,7 @@ sub run { my $file = @{$files}[0]; - my $dbi = $self->dbi(); - - my (%refseq) = %{$self->get_valid_codes("refseq",$species_id)}; + my (%refseq) = %{$self->get_valid_codes("refseq",$species_id, $dbi)}; my $rgd_io = $self->get_filehandle($file); @@ -118,6 +118,7 @@ sub run { label => $symbol, desc => $name, source_id => $source_id, + dbi => $dbi, species_id => $species_id} ); $count++; my @syns = split(/\;/,$old_name); @@ -137,9 +138,10 @@ sub run { acc => $rgd, label => $symbol, desc => $name, + dbi => $dbi, source_id => $direct_source_id, species_id => $species_id} ); - my $xref_id = $self->get_xref($rgd, $direct_source_id, $species_id); + my $xref_id = $self->get_xref($rgd, $direct_source_id, $species_id, $dbi); my @syns = split(/\;/, $old_name); foreach my $syn(@syns) { $add_syn_sth->execute($xref_id, $syn); @@ -158,6 +160,7 @@ sub run { desc => $name, source_id => $source_id, species_id => $species_id, + dbi => $dbi, info_type => "MISC"} ); $mismatch++; } diff --git a/misc-scripts/xref_mapping/XrefParser/ReactomeParser.pm b/misc-scripts/xref_mapping/XrefParser/ReactomeParser.pm index 522622d79a..c620073ec4 100644 --- a/misc-scripts/xref_mapping/XrefParser/ReactomeParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/ReactomeParser.pm @@ -40,6 +40,9 @@ sub run { my $files = $ref_arg->{files}; my $release_file = $ref_arg->{rel_file}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + my $species_name = $ref_arg->{species}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Needs to pass source_id, species_id and files as pairs"; @@ -64,11 +67,13 @@ sub run { croak "Could not find release using $release_file\n"; } - $self->set_release( $source_id, $release ); + $self->set_release( $source_id, $release, $dbi ); } # Create a hash of all valid names for this species - my %species2alias = $self->species_id2name(); + my %species2alias = $self->species_id2name($dbi); + if (defined $species_name) { push @{$species2alias{$species_id}}, $species_name; } + if (!defined $species2alias{$species_id}) { next; } my @aliases = @{$species2alias{$species_id}}; my %alias2species_id = map {$_, 1} @aliases; @@ -76,12 +81,11 @@ sub run { my $err_count = 0; my %reactome2ensembl; - my $dbi = $self->dbi(); - my $reactome_source_id = $self->get_source_id_for_source_name("reactome", "direct"); - my $transcript_reactome_source_id = $self->get_source_id_for_source_name("reactome_transcript"); - my $gene_reactome_source_id = $self->get_source_id_for_source_name("reactome_gene"); - my $reactome_uniprot_source_id = $self->get_source_id_for_source_name("reactome", "uniprot"); + my $reactome_source_id = $self->get_source_id_for_source_name("reactome", "direct", $dbi); + my $transcript_reactome_source_id = $self->get_source_id_for_source_name("reactome_transcript", undef, $dbi); + my $gene_reactome_source_id = $self->get_source_id_for_source_name("reactome_gene", undef, $dbi); + my $reactome_uniprot_source_id = $self->get_source_id_for_source_name("reactome", "uniprot", $dbi); if($reactome_source_id < 1 || $transcript_reactome_source_id < 1 || $gene_reactome_source_id < 1){ die "Could not find source id for reactome sources???\n"; } @@ -98,7 +102,7 @@ sub run { print "Source_id = $reactome_uniprot_source_id\n"; } - my (%uniprot) = %{$self->get_valid_codes("uniprot/",$species_id)}; + my (%uniprot) = %{$self->get_valid_codes("uniprot/",$species_id, $dbi)}; my $is_uniprot = 0; foreach my $file (@$files) { @@ -130,6 +134,7 @@ sub run { label => $reactome_id, desc => $description, source_id => $reactome_uniprot_source_id, + dbi => $dbi, species_id => $species_id} ); } $info_type = 'DEPENDENT'; @@ -158,9 +163,10 @@ sub run { desc => $description, info_type => $info_type, source_id => $reactome_source_id, + dbi => $dbi, species_id => $species_id} ); - $self->add_direct_xref($xref_id, $ensembl_stable_id, $type) if $type; + $self->add_direct_xref($xref_id, $ensembl_stable_id, $type, $dbi) if $type; } } } diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm index a49818771e..daf0b1e557 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm @@ -32,25 +32,28 @@ sub run_script { my ($self, $ref_arg) = @_; my $source_id = $ref_arg->{source_id}; my $species_id = $ref_arg->{species_id}; + my $species_name = $ref_arg->{species}; my $file = $ref_arg->{file}; my $verbose = $ref_arg->{verbose}; my $db = $ref_arg->{dba}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){ croak "Need to pass source_id, species_id and file as pairs"; } $verbose |=0; - my $peptide_source_id = $self->get_source_id_for_source_name('RefSeq_peptide', 'otherfeatures'); - my $mrna_source_id = $self->get_source_id_for_source_name('RefSeq_mRNA', 'otherfeatures'); - my $ncrna_source_id = $self->get_source_id_for_source_name('RefSeq_ncRNA', 'otherfeatures'); + my $peptide_source_id = $self->get_source_id_for_source_name('RefSeq_peptide', 'otherfeatures', $dbi); + my $mrna_source_id = $self->get_source_id_for_source_name('RefSeq_mRNA', 'otherfeatures', $dbi); + my $ncrna_source_id = $self->get_source_id_for_source_name('RefSeq_ncRNA', 'otherfeatures', $dbi); my $pred_peptide_source_id = - $self->get_source_id_for_source_name('RefSeq_peptide_predicted', 'otherfeatures'); + $self->get_source_id_for_source_name('RefSeq_peptide_predicted', 'otherfeatures', $dbi); my $pred_mrna_source_id = - $self->get_source_id_for_source_name('RefSeq_mRNA_predicted','otherfeatures'); + $self->get_source_id_for_source_name('RefSeq_mRNA_predicted','otherfeatures', $dbi); my $pred_ncrna_source_id = - $self->get_source_id_for_source_name('RefSeq_ncRNA_predicted', 'otherfeatures'); + $self->get_source_id_for_source_name('RefSeq_ncRNA_predicted', 'otherfeatures', $dbi); if($verbose){ print "RefSeq_peptide source ID = $peptide_source_id\n"; @@ -118,8 +121,10 @@ sub run_script { my $registry = "Bio::EnsEMBL::Registry"; #get the species name - my %id2name = $self->species_id2name; - my $species_name = $id2name{$species_id}[0]; + my %id2name = $self->species_id2name($dbi); + if (defined $species_name) { push @{$id2name{$species_id}}, $species_name; } + if (!defined $id2name{$species_id}) { next; } + $species_name = $id2name{$species_id}[0]; my $core_dba; my $otherf_dba; @@ -362,8 +367,9 @@ sub run_script { desc => '', source_id => $source_id, species_id => $species_id, + dbi => $dbi, info_type => 'DIRECT' }); - $self->add_direct_xref($xref_id, $best_id, "Transcript", ""); + $self->add_direct_xref($xref_id, $best_id, "Transcript", "", $dbi); # Also store refseq protein as direct xref for ensembl translation, if translation exists my $ta_of = $otherf_dba->get_TranscriptAdaptor(); @@ -383,8 +389,9 @@ sub run_script { desc => '', source_id => $source_id, species_id => $species_id, + dbi => $dbi, info_type => 'DIRECT' }); - $self->add_direct_xref($tl_xref_id, $tl->stable_id(), "Translation", ""); + $self->add_direct_xref($tl_xref_id, $tl->stable_id(), "Translation", "", $dbi); } } } diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm index c4f3834c59..71d793818a 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm @@ -42,9 +42,12 @@ sub run { my ($self, $ref_arg) = @_; my $source_id = $ref_arg->{source_id}; my $species_id = $ref_arg->{species_id}; + my $species_name = $ref_arg->{species}; my $files = $ref_arg->{files}; my $release_file = $ref_arg->{rel_file}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files)){ croak "Need to pass source_id, species_id, files and rel_file as pairs"; @@ -55,21 +58,21 @@ sub run { $peptide_source_id = - $self->get_source_id_for_source_name('RefSeq_peptide'); + $self->get_source_id_for_source_name('RefSeq_peptide', undef, $dbi); $mrna_source_id = - $self->get_source_id_for_source_name('RefSeq_mRNA','refseq'); + $self->get_source_id_for_source_name('RefSeq_mRNA','refseq', $dbi); $ncrna_source_id = - $self->get_source_id_for_source_name('RefSeq_ncRNA'); + $self->get_source_id_for_source_name('RefSeq_ncRNA', undef, $dbi); $pred_peptide_source_id = - $self->get_source_id_for_source_name('RefSeq_peptide_predicted'); + $self->get_source_id_for_source_name('RefSeq_peptide_predicted', undef, $dbi); $pred_mrna_source_id = - $self->get_source_id_for_source_name('RefSeq_mRNA_predicted','refseq'); + $self->get_source_id_for_source_name('RefSeq_mRNA_predicted','refseq', $dbi); $pred_ncrna_source_id = - $self->get_source_id_for_source_name('RefSeq_ncRNA_predicted'); + $self->get_source_id_for_source_name('RefSeq_ncRNA_predicted', undef, $dbi); - $entrez_source_id = $self->get_source_id_for_source_name('EntrezGene'); - $wiki_source_id = $self->get_source_id_for_source_name('WikiGene'); + $entrez_source_id = $self->get_source_id_for_source_name('EntrezGene', undef, $dbi); + $wiki_source_id = $self->get_source_id_for_source_name('WikiGene', undef, $dbi); if($verbose){ print "RefSeq_peptide source ID = $peptide_source_id\n"; @@ -80,17 +83,17 @@ sub run { print "RefSeq_ncRNA_predicted source ID = $pred_ncrna_source_id\n" ; } - (%entrez) = %{$self->get_acc_to_label("EntrezGene",$species_id)}; + (%entrez) = %{$self->get_acc_to_label("EntrezGene",$species_id, undef, $dbi)}; my @xrefs; foreach my $file (@files) { my $xrefs = - $self->create_xrefs( $file, $species_id, $verbose ); + $self->create_xrefs( $file, $species_id, $verbose, $dbi, $species_name ); if ( !defined( $xrefs ) ) { return 1; #error } - $self->upload_xref_object_graphs( $xrefs ) + $self->upload_xref_object_graphs( $xrefs, $dbi ) } if ( defined $release_file ) { @@ -107,13 +110,13 @@ sub run { print "RefSeq release: '$release'\n" if($verbose); - $self->set_release( $source_id, $release ); - $self->set_release( $peptide_source_id, $release ); - $self->set_release( $mrna_source_id, $release ); - $self->set_release( $ncrna_source_id, $release ); - $self->set_release( $pred_mrna_source_id, $release ); - $self->set_release( $pred_ncrna_source_id, $release ); - $self->set_release( $pred_peptide_source_id, $release ); + $self->set_release( $source_id, $release, $dbi ); + $self->set_release( $peptide_source_id, $release, $dbi ); + $self->set_release( $mrna_source_id, $release, $dbi ); + $self->set_release( $ncrna_source_id, $release, $dbi ); + $self->set_release( $pred_mrna_source_id, $release, $dbi ); + $self->set_release( $pred_ncrna_source_id, $release, $dbi ); + $self->set_release( $pred_peptide_source_id, $release, $dbi ); } return 0; # successful @@ -127,26 +130,28 @@ sub run { # Slightly different formats sub create_xrefs { - my ($self, $file,$species_id, $verbose ) = @_; + my ($self, $file,$species_id, $verbose, $dbi, $species_name ) = @_; # Create a hash of all valid names and taxon_ids for this species - my %species2name = $self->species_id2name(); - my %species2tax = $self->species_id2taxonomy(); + my %species2name = $self->species_id2name($dbi); + if (defined $species_name) { push @{$species2name{$species_id}}, $species_name; } + if (!defined $species2name{$species_id}) { next; } + my %species2tax = $self->species_id2taxonomy($dbi); + push @{$species2tax{$species_id}}, $species_id; my @names = @{$species2name{$species_id}}; my @tax_ids = @{$species2tax{$species_id}}; my %name2species_id = map{ $_=>$species_id } @names; my %taxonomy2species_id = map{ $_=>$species_id } @tax_ids; # Retrieve existing RefSeq mRNA - my (%refseq_ids) = (%{ $self->get_valid_codes("RefSeq_mRNA", $species_id) }, %{ $self->get_valid_codes("RefSeq_mRNA_predicted", $species_id) }); - my (%entrez_ids) = %{ $self->get_valid_codes("EntrezGene", $species_id) }; - my (%wiki_ids) = %{ $self->get_valid_codes("WikiGene", $species_id) }; + my (%refseq_ids) = (%{ $self->get_valid_codes("RefSeq_mRNA", $species_id, $dbi) }, %{ $self->get_valid_codes("RefSeq_mRNA_predicted", $species_id, $dbi) }); + my (%entrez_ids) = %{ $self->get_valid_codes("EntrezGene", $species_id, $dbi) }; + my (%wiki_ids) = %{ $self->get_valid_codes("WikiGene", $species_id, $dbi) }; - my %dependent_sources = $self->get_xref_sources(); + my %dependent_sources = $self->get_xref_sources($dbi); - my $dbi = $self->dbi(); - my $add_dependent_xref_sth = $self->dbi->prepare("INSERT INTO dependent_xref (master_xref_id,dependent_xref_id, linkage_source_id) VALUES (?,?, $entrez_source_id)"); + my $add_dependent_xref_sth = $dbi->prepare("INSERT INTO dependent_xref (master_xref_id,dependent_xref_id, linkage_source_id) VALUES (?,?, $entrez_source_id)"); my $refseq_io = $self->get_filehandle($file); diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm index d3a3582fb3..896005cfc7 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm @@ -37,6 +37,8 @@ sub run { my $files = $ref_arg->{files}; my $release_file = $ref_arg->{rel_file}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -47,18 +49,18 @@ sub run { my $peptide_source_id = - $self->get_source_id_for_source_name('RefSeq_peptide'); + $self->get_source_id_for_source_name('RefSeq_peptide', undef, $dbi); my $mrna_source_id = - $self->get_source_id_for_source_name('RefSeq_mRNA','refseq'); + $self->get_source_id_for_source_name('RefSeq_mRNA', undef, $dbi); my $ncrna_source_id = - $self->get_source_id_for_source_name('RefSeq_ncRNA'); + $self->get_source_id_for_source_name('RefSeq_ncRNA', undef, $dbi); my $pred_peptide_source_id = - $self->get_source_id_for_source_name('RefSeq_peptide_predicted'); + $self->get_source_id_for_source_name('RefSeq_peptide_predicted', undef, $dbi); my $pred_mrna_source_id = - $self->get_source_id_for_source_name('RefSeq_mRNA_predicted','refseq'); + $self->get_source_id_for_source_name('RefSeq_mRNA_predicted','refseq', $dbi); my $pred_ncrna_source_id = - $self->get_source_id_for_source_name('RefSeq_ncRNA_predicted'); + $self->get_source_id_for_source_name('RefSeq_ncRNA_predicted', undef, $dbi); if($verbose){ print "RefSeq_peptide source ID = $peptide_source_id\n"; @@ -78,7 +80,7 @@ sub run { $mrna_source_id, $ncrna_source_id, $pred_mrna_source_id, $pred_ncrna_source_id, $file, - $species_id ); + $species_id, $dbi ); if ( !defined($xrefs) ) { return 1; #error @@ -88,7 +90,7 @@ sub run { push @xrefs, @{$xrefs}; } - if ( !defined( $self->upload_xref_object_graphs( \@xrefs ) ) ) { + if ( !defined( $self->upload_xref_object_graphs( \@xrefs, $dbi ) ) ) { return 1; # error } @@ -107,13 +109,13 @@ sub run { print "RefSeq release: '$release'\n"; - $self->set_release( $source_id, $release ); - $self->set_release( $peptide_source_id, $release ); - $self->set_release( $mrna_source_id, $release ); - $self->set_release( $ncrna_source_id, $release ); - $self->set_release( $pred_peptide_source_id, $release ); - $self->set_release( $pred_mrna_source_id, $release ); - $self->set_release( $pred_ncrna_source_id, $release ); + $self->set_release( $source_id, $release, $dbi ); + $self->set_release( $peptide_source_id, $release, $dbi ); + $self->set_release( $mrna_source_id, $release, $dbi ); + $self->set_release( $ncrna_source_id, $release, $dbi ); + $self->set_release( $pred_peptide_source_id, $release, $dbi ); + $self->set_release( $pred_mrna_source_id, $release, $dbi ); + $self->set_release( $pred_ncrna_source_id, $release, $dbi ); } return 0; # successfull @@ -130,10 +132,10 @@ sub run { sub create_xrefs { my ($self, $peptide_source_id, $pred_peptide_source_id, $mrna_source_id, $ncrna_source_id, - $pred_mrna_source_id, $pred_ncrna_source_id, $file, $species_id ) = @_; + $pred_mrna_source_id, $pred_ncrna_source_id, $file, $species_id, $dbi ) = @_; # Create a hash of all valid names for this species - my %species2name = $self->species_id2name(); + my %species2name = $self->species_id2name($dbi); my @names = @{$species2name{$species_id}}; my %name2species_id = map{ $_=>$species_id } @names; # my %name2species_id = $self->name2species_id(); diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeq_CCDSParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeq_CCDSParser.pm index 1662887bf7..bbca7c6f9c 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeq_CCDSParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeq_CCDSParser.pm @@ -35,6 +35,8 @@ sub run_script { my $species_id = $ref_arg->{species_id}; my $file = $ref_arg->{file}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){ croak "Need to pass source_id, species_id and file as pairs"; @@ -61,9 +63,9 @@ sub run_script { } my $mrna_source_id = - $self->get_source_id_for_source_name('RefSeq_mRNA','ccds'); + $self->get_source_id_for_source_name('RefSeq_mRNA','ccds', $dbi); my $pred_mrna_source_id = - $self->get_source_id_for_source_name('RefSeq_mRNA_predicted','ccds'); + $self->get_source_id_for_source_name('RefSeq_mRNA_predicted','ccds', $dbi); if($verbose){ print "RefSeq_mRNA source ID = $mrna_source_id\n"; @@ -77,8 +79,6 @@ sub run_script { my %version; my %description; - my $dbi = $self->dbi(); - my $sql =(<<'RSS'); SELECT xref.accession, xref.label, xref.version, xref.description FROM xref, source @@ -221,11 +221,12 @@ CCDS desc => $description{$refseq}, source_id => $new_source_id, species_id => $species_id, + dbi => $dbi, info_type => "DIRECT"} ); foreach my $stable_id (@{$internal_to_stable_id{$internal_id}}){ - $self->add_direct_xref($xref_id, $stable_id, "Transcript", ""); + $self->add_direct_xref($xref_id, $stable_id, "Transcript", "", $dbi); $direct_count++; } diff --git a/misc-scripts/xref_mapping/XrefParser/SGDParser.pm b/misc-scripts/xref_mapping/XrefParser/SGDParser.pm index 12d2d339a0..a68964b359 100644 --- a/misc-scripts/xref_mapping/XrefParser/SGDParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/SGDParser.pm @@ -37,6 +37,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -45,9 +47,9 @@ sub run { my $file = @{$files}[0]; - my $gene_source_id = $self->get_source_id_for_source_name("SGD_GENE"); + my $gene_source_id = $self->get_source_id_for_source_name("SGD_GENE", undef, $dbi); #my $transcript_source_id = $self->get_source_id_for_source_name("SGD_TRANSCRIPT"); - my $translation_source_id = $self->get_source_id_for_source_name("SGD_TRANSLATION"); + my $translation_source_id = $self->get_source_id_for_source_name("SGD_TRANSLATION", undef, $dbi); my $sgd_io = $self->get_filehandle($file); @@ -97,16 +99,18 @@ sub run { desc => $desc, source_id => $gene_source_id, species_id => $species_id, + dbi => $dbi, info_type => "DIRECT"} ); - $self->add_direct_xref($gene_xref_id, $orf_name, "Gene", "DIRECT"); + $self->add_direct_xref($gene_xref_id, $orf_name, "Gene", "DIRECT", $dbi); my $translation_xref_id = $self->add_xref({ acc => $sgd_id, label => $locus_name, desc => $desc, source_id => $translation_source_id, species_id => $species_id, + dbi => $dbi, info_type => "DIRECT"} ); - $self->add_direct_xref($translation_xref_id, $orf_name, "Translation", "DIRECT"); + $self->add_direct_xref($translation_xref_id, $orf_name, "Translation", "DIRECT", $dbi); $xref_count++; @@ -114,7 +118,7 @@ sub run { if ($verbose) { # print STDERR "adding synonym, $synonym\n"; } - $self->add_to_syn($sgd_id, $gene_source_id, $synonym, $species_id); + $self->add_to_syn($sgd_id, $gene_source_id, $synonym, $species_id, $dbi); $syn_count++; } diff --git a/misc-scripts/xref_mapping/XrefParser/UCSCParser.pm b/misc-scripts/xref_mapping/XrefParser/UCSCParser.pm index df051a8623..e8f1af9a01 100644 --- a/misc-scripts/xref_mapping/XrefParser/UCSCParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/UCSCParser.pm @@ -33,6 +33,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -42,7 +44,7 @@ sub run { my $data_file = @{$files}[0]; # Get the $source_id for the "UCSC" source. - $source_id = $self->get_source_id_for_source_name('UCSC'); + $source_id = $self->get_source_id_for_source_name('UCSC', undef, $dbi); my $data_io = $self->get_filehandle($data_file); @@ -104,6 +106,7 @@ sub run { 'cdsStart' => $cdsStart, 'cdsEnd' => $cdsEnd, 'exonStarts' => $exonStarts, + 'dbi' => $dbi, 'exonEnds' => $exonEnds ); $self->add_xref( $source_id, $species_id, \%xref ); diff --git a/misc-scripts/xref_mapping/XrefParser/UniProtDirectParser.pm b/misc-scripts/xref_mapping/XrefParser/UniProtDirectParser.pm index 0e042d70e2..916a887671 100644 --- a/misc-scripts/xref_mapping/XrefParser/UniProtDirectParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/UniProtDirectParser.pm @@ -40,6 +40,8 @@ sub run_script { my $species_id = $ref_arg->{species_id}; my $file = $ref_arg->{file}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){ croak "Need to pass source_id, species_id and file as pairs"; @@ -111,9 +113,8 @@ sub run_script { push @{$prot2ensembl{$prot}}, $ens; } } - my $dbi = $self->dbi(); - my $sw_source_id = $self->get_source_id_for_source_name("uniprot/swissprot","sequence_mapped"); + my $sw_source_id = $self->get_source_id_for_source_name("uniprot/swissprot","sequence_mapped", $dbi); if($sw_source_id < 1){ die "Could not find source id for uniprot/swissprot ???\n"; } @@ -174,6 +175,7 @@ sub run_script { desc => $description, source_id => $source_id, species_id => $species_id, + dbi => $dbi, info_type => "DIRECT"} ); @@ -193,7 +195,7 @@ sub run_script { #add the direct xref entry # - $self->add_direct_xref( $xref_id, $trans, "Translation", ''); + $self->add_direct_xref( $xref_id, $trans, "Translation", '', $dbi); # print ":".$trans; # diff --git a/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm b/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm index 0086f30049..336a1cb583 100644 --- a/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm @@ -43,9 +43,12 @@ sub run { my ($self, $ref_arg) = @_; my $source_id = $ref_arg->{source_id}; my $species_id = $ref_arg->{species_id}; + my $species_name = $ref_arg->{species}; my $files = $ref_arg->{files}; my $release_file = $ref_arg->{rel_file}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files)){ croak "Need to pass source_id, species_id, files and rel_file as pairs"; @@ -57,15 +60,15 @@ sub run { my ( $sp_source_id, $sptr_source_id, $sp_release, $sptr_release, $sptr_non_display_source_id, $sp_direct_source_id, $sptr_direct_source_id ); $sp_source_id = - $self->get_source_id_for_source_name('Uniprot/SWISSPROT','sequence_mapped'); + $self->get_source_id_for_source_name('Uniprot/SWISSPROT','sequence_mapped', $dbi); $sptr_source_id = - $self->get_source_id_for_source_name('Uniprot/SPTREMBL', 'sequence_mapped'); + $self->get_source_id_for_source_name('Uniprot/SPTREMBL', 'sequence_mapped', $dbi); $sptr_non_display_source_id = - $self->get_source_id_for_source_name('Uniprot/SPTREMBL', 'protein_evidence_gt_2'); + $self->get_source_id_for_source_name('Uniprot/SPTREMBL', 'protein_evidence_gt_2', $dbi); - $sp_direct_source_id = $self->get_source_id_for_source_name('Uniprot/SWISSPROT', 'direct'); - $sptr_direct_source_id = $self->get_source_id_for_source_name('Uniprot/SPTREMBL', 'direct'); + $sp_direct_source_id = $self->get_source_id_for_source_name('Uniprot/SWISSPROT', 'direct', $dbi); + $sptr_direct_source_id = $self->get_source_id_for_source_name('Uniprot/SPTREMBL', 'direct', $dbi); print "SwissProt source id for $file: $sp_source_id\n" if ($verbose); print "SpTREMBL source id for $file: $sptr_source_id\n" if ($verbose); @@ -74,7 +77,7 @@ sub run { print "SpTREMBL direct source id for $file: $sptr_direct_source_id\n" if ($verbose); $self->create_xrefs( $sp_source_id, $sptr_source_id, $sptr_non_display_source_id, $species_id, - $file, $verbose, $sp_direct_source_id, $sptr_direct_source_id ); + $file, $verbose, $sp_direct_source_id, $sptr_direct_source_id, $dbi ); if ( defined $release_file ) { # Parse Swiss-Prot and SpTrEMBL release info from @@ -92,11 +95,11 @@ sub run { $release_io->close(); # Set releases - $self->set_release( $sp_source_id, $sp_release ); - $self->set_release( $sptr_source_id, $sptr_release ); - $self->set_release( $sptr_non_display_source_id, $sptr_release ); - $self->set_release( $sp_direct_source_id, $sp_release ); - $self->set_release( $sptr_direct_source_id,$sptr_release ); + $self->set_release( $sp_source_id, $sp_release, $dbi ); + $self->set_release( $sptr_source_id, $sptr_release, $dbi ); + $self->set_release( $sptr_non_display_source_id, $sptr_release, $dbi ); + $self->set_release( $sp_direct_source_id, $sp_release, $dbi ); + $self->set_release( $sptr_direct_source_id,$sptr_release, $dbi ); } @@ -108,7 +111,7 @@ sub run { # Parse file into array of xref objects sub create_xrefs { - my ($self, $sp_source_id, $sptr_source_id, $sptr_non_display_source_id, $species_id, $file, $verbose, $sp_direct_source_id, $sptr_direct_source_id ) = @_; + my ($self, $sp_source_id, $sptr_source_id, $sptr_non_display_source_id, $species_id, $file, $verbose, $sp_direct_source_id, $sptr_direct_source_id, $dbi ) = @_; my $num_sp = 0; my $num_sptr = 0; @@ -118,16 +121,16 @@ sub create_xrefs { my $num_direct_sp = 0; my $num_direct_sptr = 0; - my %dependent_sources = $self->get_xref_sources(); + my %dependent_sources = $self->get_xref_sources($dbi); if(defined($dependent_sources{'MGI'})){ - $dependent_sources{'MGI'} = $self->get_source_id_for_source_name("MGI","uniprot"); + $dependent_sources{'MGI'} = $self->get_source_id_for_source_name("MGI","uniprot", $dbi); } my (%genemap) = - %{ $self->get_valid_codes( "mim_gene", $species_id ) }; + %{ $self->get_valid_codes( "mim_gene", $species_id, $dbi ) }; my (%morbidmap) = - %{ $self->get_valid_codes( "mim_morbid", $species_id ) }; + %{ $self->get_valid_codes( "mim_morbid", $species_id, $dbi ) }; my $uniprot_io = $self->get_filehandle($file); if ( !defined $uniprot_io ) { return } @@ -137,7 +140,8 @@ sub create_xrefs { local $/ = "//\n"; # Create a hash of all valid taxon_ids for this species - my %species2tax = $self->species_id2taxonomy(); + my %species2tax = $self->species_id2taxonomy($dbi); + push @{$species2tax{$species_id}}, $species_id; my @tax_ids = @{$species2tax{$species_id}}; my %taxonomy2species_id = map{ $_=>$species_id } @tax_ids; @@ -150,7 +154,6 @@ sub create_xrefs { my %mgi_label_to_desc; my %mgi_label_to_acc; - my $dbi = $self->dbi(); my $sth = $dbi->prepare("SELECT x.accession, x.label, x.description from xref x, source s where x.source_id = s.source_id and s.name like 'MGI' and s.priority_description like 'descriptions'"); $sth->execute() or croak( $dbi->errstr() ); @@ -552,14 +555,14 @@ sub create_xrefs { push @xrefs, $xref; if ($count > 1000) { - $self->upload_xref_object_graphs(\@xrefs); + $self->upload_xref_object_graphs(\@xrefs, $dbi); $count = 0; undef @xrefs; } } - $self->upload_xref_object_graphs(\@xrefs) if scalar(@xrefs) > 0; + $self->upload_xref_object_graphs(\@xrefs, $dbi) if scalar(@xrefs) > 0; $uniprot_io->close(); diff --git a/misc-scripts/xref_mapping/XrefParser/UniProtParser_descriptions_only.pm b/misc-scripts/xref_mapping/XrefParser/UniProtParser_descriptions_only.pm index 850d2f580b..1d3db073a7 100644 --- a/misc-scripts/xref_mapping/XrefParser/UniProtParser_descriptions_only.pm +++ b/misc-scripts/xref_mapping/XrefParser/UniProtParser_descriptions_only.pm @@ -48,6 +48,8 @@ sub run { my $files = $ref_arg->{files}; my $release_file = $ref_arg->{rel_file}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) or (!defined $release_file)){ croak "Need to pass source_id, species_id, files and rel_file as pairs"; @@ -59,16 +61,16 @@ sub run { my ( $sp_source_id, $sptr_source_id, $sp_release, $sptr_release ); $sp_source_id = - $self->get_source_id_for_source_name('Uniprot/SWISSPROT',"sequence_mapped"); + $self->get_source_id_for_source_name('Uniprot/SWISSPROT',"sequence_mapped", $dbi); $sptr_source_id = - $self->get_source_id_for_source_name('Uniprot/SPTREMBL'); + $self->get_source_id_for_source_name('Uniprot/SPTREMBL', undef, $dbi); print "SwissProt source id for $file: $sp_source_id\n" if($verbose); print "SpTREMBL source id for $file: $sptr_source_id\n" if($verbose); my @xrefs = - $self->create_xrefs( $sp_source_id, $sptr_source_id, $species_id, $file, $verbose ); + $self->create_xrefs( $sp_source_id, $sptr_source_id, $species_id, $file, $verbose, $dbi ); if ( !@xrefs ) { return 1; # 1 error @@ -81,7 +83,7 @@ sub run { # } # upload - if(!defined($self->upload_xref_object_graphs(@xrefs))){ + if(!defined($self->upload_xref_object_graphs(@xrefs, $dbi))){ return 1; } @@ -90,10 +92,10 @@ sub run { # below... my $sp_pred_source_id = $self->get_source_id_for_source_name( - 'Uniprot/SWISSPROT_predicted'); + 'Uniprot/SWISSPROT_predicted', undef, $dbi); my $sptr_pred_source_id = $self->get_source_id_for_source_name( - 'Uniprot/SPTREMBL_predicted'); + 'Uniprot/SPTREMBL_predicted', undef, $dbi); # Parse Swiss-Prot and SpTrEMBL release info from # $release_file. @@ -110,10 +112,10 @@ sub run { $release_io->close(); # Set releases - $self->set_release( $sp_source_id, $sp_release ); - $self->set_release( $sptr_source_id, $sptr_release ); - $self->set_release( $sp_pred_source_id, $sp_release ); - $self->set_release( $sptr_pred_source_id, $sptr_release ); + $self->set_release( $sp_source_id, $sp_release, $dbi ); + $self->set_release( $sptr_source_id, $sptr_release, $dbi ); + $self->set_release( $sp_pred_source_id, $sp_release, $dbi ); + $self->set_release( $sptr_pred_source_id, $sptr_release, $dbi ); } @@ -125,7 +127,7 @@ sub run { # Parse file into array of xref objects sub create_xrefs { - my ($self, $sp_source_id, $sptr_source_id, $species_id, $file, $verbose ) = @_; + my ($self, $sp_source_id, $sptr_source_id, $species_id, $file, $verbose, $dbi ) = @_; my $num_sp = 0; my $num_sptr = 0; @@ -135,9 +137,9 @@ sub create_xrefs { # Get predicted equivalents of various sources used here my $sp_pred_source_id = - $self->get_source_id_for_source_name('Uniprot/SWISSPROT_predicted'); + $self->get_source_id_for_source_name('Uniprot/SWISSPROT_predicted', undef, $dbi); my $sptr_pred_source_id = - $self->get_source_id_for_source_name('Uniprot/SPTREMBL_predicted'); + $self->get_source_id_for_source_name('Uniprot/SPTREMBL_predicted', undef, $dbi); print "Predicted SwissProt source id for $file: $sp_pred_source_id\n" if($verbose); print "Predicted SpTREMBL source id for $file: $sptr_pred_source_id\n" if($verbose); @@ -150,7 +152,7 @@ sub create_xrefs { local $/ = "//\n"; # Create a hash of all valid taxon_ids for this species - my %species2tax = $self->species_id2taxonomy(); + my %species2tax = $self->species_id2taxonomy($dbi); my @tax_ids = @{$species2tax{$species_id}}; my %taxonomy2species_id = map{ $_=>$species_id } @tax_ids; diff --git a/misc-scripts/xref_mapping/XrefParser/VGNCParser.pm b/misc-scripts/xref_mapping/XrefParser/VGNCParser.pm index cd3f395622..346ad87a9b 100644 --- a/misc-scripts/xref_mapping/XrefParser/VGNCParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/VGNCParser.pm @@ -32,6 +32,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id, files and rel_file as pairs"; @@ -49,9 +51,9 @@ sub run { return 1; } - my $source_name = $self->get_source_name_for_source_id($source_id); + my $source_name = $self->get_source_name_for_source_id($source_id, $dbi); # Create a hash of all valid taxon_ids for this species - my %species2tax = $self->species_id2taxonomy(); + my %species2tax = $self->species_id2taxonomy($dbi); my @tax_ids = @{$species2tax{$species_id}}; my %taxonomy2species_id = map{ $_=>$species_id } @tax_ids; @@ -80,13 +82,15 @@ sub run { type => 'gene', acc => $acc, label => $symbol, - desc => $name,, + desc => $name, + dbi => $dbi, source_id => $source_id, species_id => $species_id} ); $self->add_synonyms_for_hgnc( {source_id => $source_id, name => $acc, species_id => $species_id, + dbi => $dbi, dead => $previous_symbols, alias => $synonyms}); diff --git a/misc-scripts/xref_mapping/XrefParser/XenopusJamboreeParser.pm b/misc-scripts/xref_mapping/XrefParser/XenopusJamboreeParser.pm index bcdd99873a..647edb3c43 100644 --- a/misc-scripts/xref_mapping/XrefParser/XenopusJamboreeParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/XenopusJamboreeParser.pm @@ -36,6 +36,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -64,6 +66,7 @@ sub run { acc => $acc, label => $label, desc => $desc, + dbi => $dbi, source_id => $source_id, species_id => $species_id }); $count++; diff --git a/misc-scripts/xref_mapping/XrefParser/ZFINParser.pm b/misc-scripts/xref_mapping/XrefParser/ZFINParser.pm index 7fb0d7da46..465716463a 100644 --- a/misc-scripts/xref_mapping/XrefParser/ZFINParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/ZFINParser.pm @@ -34,6 +34,8 @@ sub run { my $species_id = $ref_arg->{species_id}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -43,8 +45,8 @@ sub run { my $file = @{$files}[0]; my $dir = dirname($file); - my (%swiss) = %{$self->get_valid_codes("uniprot/",$species_id)}; - my (%refseq) = %{$self->get_valid_codes("refseq",$species_id)}; + my (%swiss) = %{$self->get_valid_codes("uniprot/",$species_id, $dbi)}; + my (%refseq) = %{$self->get_valid_codes("refseq",$species_id, $dbi)}; my $swissprot_io = $self->get_filehandle( catfile( $dir, 'uniprot.txt' ) ); @@ -62,9 +64,6 @@ sub run { my %description; - my $dbi = $self->dbi(); - - my $sql = "insert ignore into synonym (xref_id, synonym) values (?, ?)"; my $add_syn_sth = $dbi->prepare($sql); @@ -109,6 +108,7 @@ sub run { label => $label, desc => $description{$zfin}, source_id => $source_id, + dbi => $dbi, species_id => $species_id} ); $spcount++; } @@ -143,6 +143,7 @@ sub run { label => $label, desc => $description{$zfin}, source_id => $source_id, + dbi => $dbi, species_id => $species_id} ); $rscount++; } @@ -154,7 +155,7 @@ sub run { $refseq_io->close(); - my (%zfin) = %{$self->get_valid_codes("zfin",$species_id)}; + my (%zfin) = %{$self->get_valid_codes("zfin",$species_id, $dbi)}; my $zfin_io = $self->get_filehandle( catfile( $dir, 'aliases.txt' ) ); @@ -183,7 +184,7 @@ sub run { chomp; my ($acc, undef, undef, $syn) = split (/\t/,$_); if(defined($zfin{$acc})){ - $self->add_to_syn_for_mult_sources($acc, $sources, $syn, $species_id); + $self->add_to_syn_for_mult_sources($acc, $sources, $syn, $species_id, $dbi); $syncount++; } } diff --git a/misc-scripts/xref_mapping/XrefParser/miRBaseParser.pm b/misc-scripts/xref_mapping/XrefParser/miRBaseParser.pm index 8f8149ea54..28b98b5344 100644 --- a/misc-scripts/xref_mapping/XrefParser/miRBaseParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/miRBaseParser.pm @@ -32,8 +32,11 @@ sub run { my ($self, $ref_arg) = @_; my $source_id = $ref_arg->{source_id}; my $species_id = $ref_arg->{species_id}; + my $species_name = $ref_arg->{species}; my $files = $ref_arg->{files}; my $verbose = $ref_arg->{verbose}; + my $dbi = $ref_arg->{dbi}; + $dbi = $self->dbi unless defined $dbi; if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){ croak "Need to pass source_id, species_id and files as pairs"; @@ -46,12 +49,12 @@ sub run { $species_id = $self->get_species_id_for_filename($file); } - my $xrefs = $self->create_xrefs($source_id, $file, $species_id); + my $xrefs = $self->create_xrefs($source_id, $file, $species_id, $dbi, $species_name); if(!defined($xrefs)){ return 1; #error } # upload - if(!defined($self->upload_xref_object_graphs($xrefs))){ + if(!defined($self->upload_xref_object_graphs($xrefs, $dbi))){ return 1; } return 0; # successfull @@ -63,9 +66,11 @@ sub run { sub create_xrefs { - my ($self, $source_id, $file, $species_id) = @_; + my ($self, $source_id, $file, $species_id, $dbi, $species_name) = @_; - my %species2name = $self->species_id2name(); + my %species2name = $self->species_id2name($dbi); + if (defined $species_name) { push @{$species2name{$species_id}}, $species_name; } + if (!defined $species2name{$species_id}) { next; } my @names = @{$species2name{$species_id}}; my %name2species_id = map{ $_=>$species_id } @names; -- GitLab