Skip to content
Snippets Groups Projects
Commit 195ae23f authored by Michele Clamp's avatar Michele Clamp
Browse files

Basic table/query sql and a test DBI script

parent 9aa0d494
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/perl
use DBI;
use strict;
my @driver_names = DBI->available_drivers;
#print("Drivers : @driver_names\n");
# Connect to database
my $dbh = DBI->connect('DBI:mysql:pog','','');
if ($dbh) {
print("Connection successful. Hoorah!!\n");
}
# Select all genscan predictions;
my $sth = $dbh->prepare('select id,contig,start,end from homol where analysis = \'\'');
my $rv = $sth->execute;
# Loop over all predicted exons
while(my $rowhash = $sth->fetchrow_hashref) {
my $id = $rowhash->{id};
my $contig = $rowhash->{contig};
my $start = $rowhash->{start};
my $end = $rowhash->{end};
# Prepare the homol overlap query
my $sth2 = $dbh->prepare("
select p1.id,p1.analysis,p1.start,p1.end,count(*)
from homol as p1, homol as p2
where p2.id = \'" . $id . "\' AND
p1.contig = p2.contig AND
p1.analysis != \'\' AND
p2.analysis = \'\' AND
p2.strand = p1.strand AND
p1.start >= p2.start AND
p1.end <= p2.end
group by p1.id;");
# And execute
my $rv2= $sth2->execute;
# Only report if we have overlaps
if ($sth2->rows > 0) {
print("\nHomols overlapping with predicted exon $id from $contig ($start - $end)\n\n");
printf("%20s %10s %15s %5s %5s\n","Hit id","No of hits","Type","Start","End");
printf("%20s %10s %15s %5s %5s\n","------","----------","----","-----","---");
while(my $rowhash2 = $sth2->fetchrow_hashref) {
my $hit = $rowhash2->{id};
my $count = $rowhash2->{'count(*)'};
my $type = $rowhash2->{'analysis'};
my $start2 = $rowhash2->{'start'};
my $end2 = $rowhash2->{'end'};
printf("%20s %10d %15s %5d %5d ",$hit,$count,$type,$start2,$end2);
# Now find any extra contig overlaps for that hit
my $sth3 = $dbh->prepare("
select homol.contig
from homol
where homol.id = \'" . $hit . "\' AND
homol.contig != \'" . $contig . "\'
group by homol.contig;");
my $rv3 = $sth3->execute;
if ($sth3->rows > 0) {
my @contigs = $sth3->fetchrow_array;
print("@contigs\n");
} else {
print("\n");
}
}
}
}
# Nice and tidy
$dbh->disconnect;
# Select all genes
select *
from gene;
# Select a specific transcript
select *
from transcript
where id = 'dJ402G11.05009.GENSCAN.0.CDS.1';
# Select all + strand exons (id and contig only)
select id,contig
from exon
where strand = '+';
# Select all exons in gene dJ40E16.02584.GENSCAN.0
select exon.id
from exon,transcript,exon_bridge
where exon_bridge.exon = exon.id AND
exon_bridge.transcript = transcript.id AND
transcript.geneid = 'dJ40E16.02584.GENSCAN.0';
# how many genscan predictions are there?
select homolset,count(*)
from homol
where homolset != ''
group by homolset;
# how many times does a blastp match hit a contig
select id,count(*)
from homol
where analysis = 'blastp_swir'
group by id;
# which database hits hit different contigs - this prints ALL links
# this should have a group by contig here somewhere
select p1.contig,p2.contig,p2.id
from homol as p1, homol as p2
where p1.contig != p2.contig AND
p1.id = p2.id;
# this might be better
# just prints out different contigs
select count(*),homol.id,homol.contig
from homol
where id = 'WP:F15G9.4A'
group by homol.contig;
# groups by contigs for blastp hits for all homols
select count(*),homol.id,homol.contig
from homol
where analysis = 'blastp_swir'
group by homol.contig;
# similar to above but counting the number of contig-contig links
select p1.id,count(*)
from homol as p1, homol as p2
where p1.contig != p2.contig AND
p1.id = p2.id
group by p1.id;
# show all hits between different contigs for id XL28SR
# n.b. brings up hits twice - don't know how to get around this.
select p1.*
from homol as p1, homol as p2
where p1.contig != p2.contig AND
p1.id = p2.id AND
p1.id = 'XL28SR';
# count number of exons for gene dJ437I16.00294.GENSCAN.5
select transcript.geneid,count(*)
from exon_bridge,transcript
where transcript.geneid = 'dJ437I16.00294.GENSCAN.5' AND
exon_bridge.transcript = transcript.id
group by transcript.geneid;
# count number of exons for all genes
select transcript.geneid,count(*)
from exon_bridge,transcript
where exon_bridge.transcript = transcript.id
group by transcript.geneid;
# find homol overlaps with exon dJ437I16.00294.GENSCAN.0.1
select homol.*
from homol,exon
where homol.analysis != '' AND
exon.id = 'dJ437I16.00294.GENSCAN.0.1' AND
homol.strand = exon.strand AND
homol.start >= exon.start AND
homol.end <= exon.end AND
homol.contig = exon.contig;
# or to just report the different homol ids and number of hits
select homol.id,count(*)
from homol,exon
where homol.analysis != '' AND
exon.id = 'dJ437I16.00294.GENSCAN.0.1' AND
homol.strand = exon.strand AND
homol.start >= exon.start AND
homol.end <= exon.end AND
homol.contig = exon.contig
group by homol.id;
# find all homol hits with all exons
# note - can't find how to order these by exon id
select exon.id,homol.id,count(*)
from homol,exon
where homol.analysis != '' AND
homol.strand = exon.strand AND
homol.start >= exon.start AND
homol.end <= exon.end AND
homol.contig = exon.contig
group by homol.id;
# find all homol hits that overlap with genscan predictions
select p1.id,p2.id,count(*)
from homol as p1, homol as p2
where p1.contig = p2.contig AND
p1.analysis != '' AND
p2.analysis = '' AND
p2.strand = p1.strand AND
p1.start >= p2.start AND
p1.end <= p2.end
group by p1.id;
use pog;
create table exon(id VARCHAR(40) not null,primary key(id), contig VARCHAR(40) not null, version VARCHAR(10) not null, created DATE not null, modified DATE not null, start INT(10) not null, end INT(10) not null, strand VARCHAR(1) not null, key id_contig (id,contig));
create table transcript(id VARCHAR(40) not null,primary key(id), geneid VARCHAR(40) not null, key id_geneid (id,geneid));
create table homolhit(id VARCHAR(40) not null,primary key(id), name VARCHAR(40) not null, start INT(10) not null, end INT(10) not null,strand VARCHAR(1) not null, type VARCHAR(40) not null);
create table homolset (homolid VARCHAR(40) not null, setid VARCHAR(40) not null, key homol_set(homolid,setid));
create table homol(id VARCHAR(40) not null, contig VARCHAR(40) not null, start INT(10) not null, end INT(10) not null, hstart INT(10) not null, hend INT(10) not null,score INT(10) not null, strand VARCHAR(1),analysis VARCHAR(40) not null, homolset VARCHAR(40), key id_contig (id,contig),key(start));
create table gene(id VARCHAR(40) not null, primary key(id), version VARCHAR(40) not null, created DATE not null, modified DATE not null);
create table contig(id VARCHAR(40) not null, clone VARCHAR(40) not null,primary key(id), map VARCHAR(40) not null, start INT(10), end INT(10));
create table mapbin(id VARCHAR(40) not null,primary key(id), chromosome VARCHAR(2) not null);
create table analysis(id VARCHAR(40) not null, primary key(id),db VARCHAR(40),db_version VARCHAR(5), program VARCHAR(40) not null, program_version VARCHAR(5), method VARCHAR(40) not null);
create table exon_bridge(exon VARCHAR(40) not null, transcript VARCHAR(40) not null, rank INT(10) not null, key exon_transcript (exon,transcript));
create table homol_bridge(homol VARCHAR(40) not null, homolhit VARCHAR(40) not null, rank INT(10) not null);
create table test(id int not null auto_increment, primary key(id), text VARCHAR(20));
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment