Basic table/query sql and a test DBI script

195ae23f · Michele Clamp · 9aa0d494 · 195ae23f · 195ae23f · 195ae23f
Commit 195ae23f authored 25 years ago by Michele Clamp
--- a/scripts/testDBI
+++ b/scripts/testDBI
+#!/usr/bin/perl
+use DBI;
+
+use strict;
+
+my @driver_names = DBI->available_drivers;
+
+#print("Drivers : @driver_names\n");
+
+# Connect to database
+my $dbh = DBI->connect('DBI:mysql:pog','','');
+
+if ($dbh) {
+  print("Connection successful. Hoorah!!\n");
+}
+
+# Select all genscan predictions;
+my $sth = $dbh->prepare('select id,contig,start,end from homol where analysis = \'\'');
+my $rv = $sth->execute;
+
+# Loop over all predicted exons
+while(my $rowhash = $sth->fetchrow_hashref) {
+  my $id     = $rowhash->{id};
+  my $contig = $rowhash->{contig};
+  my $start  = $rowhash->{start};
+  my $end    = $rowhash->{end};
+  
+  # Prepare the homol overlap query
+  my $sth2 = $dbh->prepare("
+       select    p1.id,p1.analysis,p1.start,p1.end,count(*) 
+       from      homol as p1, homol as p2 
+       where     p2.id = \'" . $id . "\' AND
+                 p1.contig = p2.contig   AND
+                 p1.analysis != \'\'     AND
+        	 p2.analysis  = \'\'     AND
+                 p2.strand = p1.strand   AND 
+                 p1.start >= p2.start    AND 
+                 p1.end <= p2.end                     
+       group by  p1.id;");
+
+  # And execute
+  my $rv2=  $sth2->execute;
+
+  # Only report if we have overlaps
+  if ($sth2->rows > 0) {
+
+    print("\nHomols overlapping with predicted exon $id from $contig ($start - $end)\n\n");
+    printf("%20s %10s %15s %5s %5s\n","Hit id","No of hits","Type","Start","End");
+    printf("%20s %10s %15s %5s %5s\n","------","----------","----","-----","---");
+
+    while(my $rowhash2 = $sth2->fetchrow_hashref) {
+      my $hit    = $rowhash2->{id};
+      my $count  = $rowhash2->{'count(*)'};
+      my $type   = $rowhash2->{'analysis'};
+      my $start2 = $rowhash2->{'start'};
+      my $end2   = $rowhash2->{'end'};
+
+      printf("%20s %10d %15s %5d %5d ",$hit,$count,$type,$start2,$end2);
+
+      # Now find any extra contig overlaps for that hit
+      my $sth3 = $dbh->prepare("
+           select   homol.contig
+           from     homol
+           where    homol.id = \'" . $hit . "\' AND
+                    homol.contig != \'" . $contig . "\'
+           group by homol.contig;");
+
+      my $rv3 = $sth3->execute;
+      if ($sth3->rows > 0) {
+	my @contigs = $sth3->fetchrow_array;
+	print("@contigs\n");
+      } else {
+	print("\n");
+      }
+
+    }
+  }
+}
+
+
+# Nice and tidy
+$dbh->disconnect;
--- a/sql/queries.sql
+++ b/sql/queries.sql
+# Select all genes
+select   * 
+from     gene;
+
+
+# Select a specific transcript
+select   *
+from     transcript
+where    id =  'dJ402G11.05009.GENSCAN.0.CDS.1';
+
+# Select all + strand exons (id and contig only)
+
+select   id,contig
+from     exon
+where    strand = '+';
+
+# Select all exons in gene   dJ40E16.02584.GENSCAN.0
+select   exon.id
+from     exon,transcript,exon_bridge
+where    exon_bridge.exon = exon.id                     AND
+         exon_bridge.transcript = transcript.id         AND
+         transcript.geneid = 'dJ40E16.02584.GENSCAN.0';
+         
+# how many genscan predictions are there?
+select   homolset,count(*) 
+from     homol
+where    homolset != '' 
+group by homolset;
+
+# how many times does a blastp match hit a contig
+select    id,count(*) 
+from      homol 
+where     analysis = 'blastp_swir' 
+group by  id; 
+
+# which database hits hit different contigs - this prints ALL links
+# this should have a group by contig here somewhere
+select    p1.contig,p2.contig,p2.id 
+from      homol as p1, homol as p2 
+where     p1.contig != p2.contig AND 
+          p1.id = p2.id;
+
+# this might be better
+# just prints out different contigs
+select    count(*),homol.id,homol.contig 
+from      homol 
+where     id = 'WP:F15G9.4A' 
+group by  homol.contig;
+
+# groups by contigs for blastp hits for all homols
+select    count(*),homol.id,homol.contig 
+from      homol 
+where     analysis = 'blastp_swir'
+group by  homol.contig;
+
+# similar to above but counting the number of contig-contig links
+select    p1.id,count(*) 
+from      homol as p1, homol as p2 
+where     p1.contig != p2.contig AND 
+          p1.id = p2.id 
+group by  p1.id;
+
+# show all hits between different contigs for id XL28SR
+# n.b. brings up hits twice - don't know how to get around this.
+select    p1.* 
+from      homol as p1, homol as p2 
+where     p1.contig != p2.contig AND 
+          p1.id = p2.id          AND 
+          p1.id = 'XL28SR';
+
+# count number of exons for gene  dJ437I16.00294.GENSCAN.5
+select    transcript.geneid,count(*) 
+from      exon_bridge,transcript 
+where     transcript.geneid = 'dJ437I16.00294.GENSCAN.5' AND  
+          exon_bridge.transcript = transcript.id 
+group by  transcript.geneid;
+
+# count number of exons for all genes
+select    transcript.geneid,count(*)
+from      exon_bridge,transcript 
+where     exon_bridge.transcript = transcript.id
+group by  transcript.geneid;
+
+
+# find homol overlaps with exon dJ437I16.00294.GENSCAN.0.1
+select    homol.* 
+from      homol,exon 
+where     homol.analysis != ''                   AND
+          exon.id = 'dJ437I16.00294.GENSCAN.0.1' AND 
+          homol.strand = exon.strand             AND 
+          homol.start >= exon.start              AND 
+          homol.end   <= exon.end                AND 
+          homol.contig = exon.contig;
+
+# or to just report the different homol ids and number of hits
+select    homol.id,count(*)
+from      homol,exon 
+where     homol.analysis != ''                   AND
+          exon.id = 'dJ437I16.00294.GENSCAN.0.1' AND 
+          homol.strand = exon.strand             AND 
+          homol.start >= exon.start              AND 
+          homol.end   <= exon.end                AND 
+          homol.contig = exon.contig
+group by  homol.id;
+
+
+# find all homol hits with all exons
+# note - can't find how to order these by exon id
+select    exon.id,homol.id,count(*)
+from      homol,exon 
+where     homol.analysis != ''                   AND
+          homol.strand = exon.strand             AND 
+          homol.start >= exon.start              AND 
+          homol.end   <= exon.end                AND 
+          homol.contig = exon.contig
+group by  homol.id;
+
+
+# find all homol hits that overlap with genscan predictions
+select    p1.id,p2.id,count(*) 
+from      homol as p1, homol as p2 
+where     p1.contig = p2.contig      AND
+          p1.analysis != ''          AND
+	  p2.analysis  = ''          AND
+          p2.strand = p1.strand      AND 
+          p1.start >= p2.start       AND 
+          p1.end <= p2.end                     
+group by  p1.id;
--- a/sql/tables.sql
+++ b/sql/tables.sql
+use pog;
+
+create table exon(id VARCHAR(40) not null,primary key(id), contig VARCHAR(40) not null, version VARCHAR(10) not null, created DATE not null, modified DATE not null, start INT(10) not null, end INT(10) not null, strand VARCHAR(1) not null, key id_contig (id,contig));
+
+create table transcript(id VARCHAR(40) not null,primary key(id), geneid VARCHAR(40) not null, key id_geneid (id,geneid));
+
+create table homolhit(id VARCHAR(40) not null,primary key(id), name VARCHAR(40) not null, start INT(10) not null, end INT(10) not null,strand VARCHAR(1) not null, type VARCHAR(40) not null);
+
+create table homolset (homolid VARCHAR(40) not null, setid VARCHAR(40) not null, key homol_set(homolid,setid));
+
+create table homol(id VARCHAR(40) not null, contig VARCHAR(40) not null, start INT(10) not null, end INT(10) not null, hstart INT(10) not null, hend INT(10) not null,score INT(10) not null, strand VARCHAR(1),analysis VARCHAR(40) not null, homolset VARCHAR(40), key id_contig (id,contig),key(start));
+
+create table gene(id VARCHAR(40) not null, primary key(id), version VARCHAR(40) not null, created DATE not null, modified DATE not null);
+
+create table contig(id VARCHAR(40) not null, clone VARCHAR(40) not null,primary key(id), map VARCHAR(40) not null, start INT(10), end INT(10));
+
+create table mapbin(id VARCHAR(40) not null,primary key(id), chromosome VARCHAR(2) not null);
+
+create table analysis(id VARCHAR(40) not null, primary key(id),db VARCHAR(40),db_version VARCHAR(5), program VARCHAR(40) not null, program_version VARCHAR(5), method VARCHAR(40) not null);
+
+create table exon_bridge(exon VARCHAR(40) not null, transcript VARCHAR(40) not null, rank INT(10) not null, key exon_transcript (exon,transcript));
+
+create table homol_bridge(homol VARCHAR(40) not null, homolhit VARCHAR(40) not null, rank INT(10) not null);
+
+create table test(id int  not null auto_increment, primary key(id), text VARCHAR(20));