The aim of this script is to produce a flat file containing text information about gathered from different databases (OMIM, Interpro, Swissprot). For OMIM and SP the link from the Ensembl database to the external one is taken from GeneDBLink. For Interpro information is taken from the interpro table.
#foreach my $comment ( $seq->annotation->each_Comment ) {
#push (@comments,$comment->text);
#}
#my $tot_sp = "$ke|$desc|@refs|@comments";
my$tot_sp="$ke|$desc|@refs";
if(!defined$sp{$ac}){
$sp{$ac}=[];
}
$sp{$ac}=$tot_sp;
}
#Dump files
printSTDERR"Dumping file\n";
#Get all of the distinct gene ac present in genedblink
#my $sth2 = $ensdb->prepare ("select distinct (gene_id) from genedblink");
my$sth2=$ensdb->prepare("select distinct (ensembl_id) from objectXref");
$sth2->execute;
while(my@row2=$sth2->fetchrow){
if($row2[0]){
#print STDERR "$row2[0]\n";
# print STDERR "Dump for MIM\n";
#my $sth3 = $ensdb->prepare ("select external_id from genedblink where gene_id = '$row2[0]' and external_db = 'MIM'");
my$sth3=$ensdb->prepare("select x.dbprimary_id, gs.stable_id from gene_stable_id gs, Xref as x, externalDB as e, objectXref as o, transcript as t where gs.gene_id=t.gene_id and o.xrefId = x.xrefId and o.ensembl_id = '$row2[0]' and t.translation_id = '$row2[0]'");
$sth3->execute;
my$seen3=0;
while(my@row3=$sth3->fetchrow){
$seen3=1;
#If this gene ac has a link to an OMIM ac dump the text corresponding to this OMIM ac
#my $sth4 = $ensdb->prepare ("select external_id from genedblink where gene_id = '$row2[0]' and (external_db = 'SPTREMBL' or external_db = 'SWISS')");
#my $sth4 = $ensdb->prepare ("select x.dbprimary_id, t.gene from Xref as x, externalDB as e, objectXref as o, transcript as t where o.xrefId = x.xrefId and o.ensembl_id = '$row2[0]' and t.translation = '$row2[0]'");
#$sth4->execute;
#my $seen4=0;
#while (my @row4 = $sth4->fetchrow) {
#$seen4 = 1;
#Same for SP
#if ($sp{$row4[0]}) {
#print OUT "$row4[1]\|SPTR\|$row4[0]\|$sp{$row4[0]}\n";
#}
#}
}
}
printSTDERR"Dump for Interpro\n";
#Select all of the gene ac having an intepro domain
my$sth5=$ensdb->prepare("select gs.stable_id,i.interpro_ac from protein_feature as pf, gene_stable_id gs,transcript as t,interpro as i where gs.gene_id=t.gene_id and pf.translation = t.translation_id and pf.hid = i.id");
$sth5->execute;
my%saw;
while(my@row5=$sth5->fetchrow){
#If the gene has an interpro domain, dump the text describing this domain