-
Laurent Gil authored
- Add a new optional @colour tag, to colour the tables (if you want to be consistent with colours used in your database schema). - Add a colour legend at the bottom of the HTML page, if colours are found. - You can disable this feature using the option '-c 0'
dc04b7bc
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
sql2html.pl 19.76 KiB
#!/bin/perl
# 1st Feb 2011
# Generate an HTML documentation page from an SQL file.
#
# It needs to have a "javascript like" documentation above each table. e.g.:
####################################################################################
#/**
#@table variation
#@desc This is the schema's generic representation of a variation.
#@colour #FF0000
#@column variation_id Primary key, internal identifier.
#@column source_id Foreign key references to the @link source table.
#@column name Name of the variation. e.g. "rs1333049".
#@column validation_status Variant discovery method and validation from dbSNP.
#@column ancestral_allele Taken from dbSNP to show ancestral allele for the variation.
#@column flipped This is set to 1 if the variant is flipped.
#@column class_so_id Class of the variation, based on the Sequence Ontology.
#@see variation_synonym
#@see flanking_sequence
#@see failed_variation
#@see variation_feature
#@see variation_group_variation
#@see allele
#@see allele_group_allele
#@see individual_genotype_multiple_bp
#*/
#
#
#create table variation (
# variation_id int(10) unsigned not null auto_increment, # PK
# source_id int(10) unsigned not null,
# name varchar(255),
# validation_status SET('cluster','freq','submitter','doublehit','hapmap','1000Genome','failed','precious'),
# ancestral_allele text,
# flipped tinyint(1) unsigned NULL DEFAULT NULL,
# class_so_id ENUM('SO:0001483','SO:1000002','SO:0000667') DEFAULT 'SO:0001059', # default to sequence_alteration
#
# primary key( variation_id ),
# unique ( name ),
# key source_idx (source_id)
#);
########################################################################################
# Tags description:
# /** and */ : begin and end of the document block
# @header : tag to create a group of tables
# @table : name of the sql table
# @desc : description of the role/content of the table, set or info tags
# @colour : tag to colour the header of the table (e.g. if the tables are coloured in the graphic SQL schema and you want to reproduce it in the HTML version)
# @column : column_name [tab(s)] Column description. Note: 1 ligne = 1 column
# @see : tables names linked to the described table
# @link : Internal link to an other table description. The format is ... @link table_name ...
# @info : tag to describe additional information about a table or a set of tables
use strict;
use POSIX;
use Getopt::Long;
###############
### Options ###
###############
my ($sql_file,$html_file,$db_team,$show_colour,$header_flag,$sort_headers,$sort_tables,$help);
usage() if (!scalar(@ARGV));
GetOptions(
'i=s' => \$sql_file,
'o=s' => \$html_file,
'd=s' => \$db_team,
'c=i' => \$show_colour,
'show_header=i' => \$header_flag,
'sort_headers=i' => \$sort_headers,
'sort_tables=i' => \$sort_tables,
'help!' => \$help
);
usage() if ($help);
if (!$sql_file) {
print "> Error! Please give a sql file using the option '-i' \n";
usage();
}
if (!$html_file) {
print "> Error! Please give an output file using the option '-o'\n";
usage();
}
$show_colour = 1 if (!defined($show_colour));
$header_flag = 1 if (!defined($header_flag));
$sort_headers = 1 if (!defined($sort_headers));
$sort_tables = 1 if (!defined($sort_tables));
##############
### Header ###
##############
my $html_header = qq{
<html>
<head>
<meta http-equiv="CONTENT-TYPE" content="text/html; charset=utf-8" />
<title>$db_team Schema Documentation</title>
<script language="Javascript" type="text/javascript">
// Function to show/hide the columns table
function show_hide (param) {
div = document.getElementById('div_'+param);
alink = document.getElementById('a_'+param);
if (div.style.display=='inline') {
div.style.display='none';
alink.innerHTML='Show';
}
else {
if (div.style.display=='none') {
div.style.display='inline';
alink.innerHTML='Hide';
}
}
}
</script>
</head>
<body>
<h1>Ensembl $db_team Schema Documentation</h1>
<h2>Introduction</h2>
<p><i>
<please, insert your introduction here>
</i></p>
<br />
};
##############
### Footer ##
##############
my $html_footer = qq{
</body>
</html>};
################
### Settings ##
################
my $default_colour = '#000000'; # Black
my %display_col = ('Show' => 'none', 'Hide' => 'inline');
my $documentation = {};
my $tables_names = {'default' => []};
my @header_names = ('default');
my @colours = ($default_colour);
my $in_doc = 0;
my $in_table = 0;
my $header = 'default';
my $table = '';
my $info = '';
my $nb_by_col = 15;
my $count_sql_col = 0;
my $tag_content = '';
my $tag = '';
my $display = 'Show';
my $parenth_count = 0;
#############
## Parser ##
#############
open SQLFILE, "< $sql_file" or die "Can't open $sql_file : $!";
while (<SQLFILE>) {
chomp $_;
next if ($_ eq '');
next if ($_ =~ /^\s*(DROP|PARTITION)/i);
# Verifications
if ($_ =~ /^\/\*\*/) { $in_doc=1; next; } # start of a table documentation
if ($_ =~ /^\s*create\s+table\s+(if\s+not\s+exists\s+)?(\w+)/i) { # start to parse the content of the table
my $sql_t_name = remove_char($2);
if ($sql_t_name eq $table) {
$in_table=1;
$parenth_count++;
}
else {
print STDERR "The documentation of the table $sql_t_name has not be found!\n";
}
next;
}
next if ($in_doc==0 and $in_table==0);
my $doc = $_;
## Parsing of the documentation ##
if ($in_doc==1) {
# Header name
if ($doc =~ /^\@header\s*(.+)$/i and $header_flag == 1) {
$header = $1;
push (@header_names,$header);
$tables_names->{$header} = [];
next;
}
# Table name
elsif ($doc =~ /^\@table\s*(\w+)/i) {
$table = $1;
push(@{$tables_names->{$header}},$table);
$documentation->{$header}{'tables'}{$table} = { 'desc' => '', 'colour' => '', 'column' => [], 'see' => [], 'info' => [] };
$tag = $tag_content = '';
}
# Description (used for both set, table and info tags)
elsif ($doc =~ /^\@(desc)\s*(.+)$/i) {
fill_documentation ($1,$2);
}
# Colour of the table header (optional)
elsif ($doc =~ /^\@(colour)\s*(.+)$/i and $show_colour) {
fill_documentation ($1,$2);
}
# Column
elsif ($doc =~ /^\@(column)\s*(.+)$/i) {
fill_documentation ($1,$2);
}
# See other tables
elsif ($doc =~ /^\@(see)\s*(.+)$/i) {
fill_documentation ($1,$2);
}
# Addtional information block
elsif ($doc =~ /^\@(info)\s*(.+)$/i) {
fill_documentation ();
$info = $2;
next;
}
# End of documentation
elsif ($doc =~ /^\*\//) { # End of the documentation block
fill_documentation (); # Add the last tag content to the documentation hash
$in_doc=0;
next;
}
elsif ($doc =~ /^\s*(.+)$/) { # If a tag content is split in several lines
$tag_content .= " $1";
}
}
## Parsing of the SQL table to fetch the columns types ##
elsif ($in_table==1) {
#END OF TABLE DEFINITION
#Can't do this easily with a simply regex as there are varying valid formats
#The end of the table definition is actually defined by 2nd enclosing bracket
#Regex counting VOODOO!
#This basically puts the regex in a list context
#before inc/dec'ing with it in a scalar context.
$parenth_count +=()= $doc =~ /\(/gi;
$parenth_count -=()= $doc =~ /\)/gi;
# warn "parenth count is $parenth_count";
if ($parenth_count == 0) { # End of the sql table definition
if (scalar @{$documentation->{$header}{'tables'}{$table}{column}} > $count_sql_col) {
#use Data::Dumper;
#warn "col count $count_sql_col";
#warn Data::Dumper::Dumper(\$documentation);
print STDERR "Description of a non existant column in the table $table!\n";
}
$in_table=0;
$count_sql_col = 0;
$table='';
$parenth_count = 0;
}
else{
# warn "Processing table";
## INDEXES ##
if ($doc =~ /^\s*(primary\skey)\s*\((.+)\)/i or $doc =~ /^\s*(unique)\s*\((.+)\)/i){ # Primary or unique
my $icol = remove_char($2);
add_column_index($1,$icol);
next;
}
elsif ($doc =~ /^\s*(unique\s)?(key)\s+(\S+)\s*\((.+)\)/i) { # Keys and indexes
my $icol = remove_char($4);
add_column_index("$1$2",$icol,$3);
next;
}
elsif ($doc =~ /^\s*(key)\s+\((.+)\)/i) { # Keys
my $icol = remove_char($2);
add_column_index("$1",$icol,'');
next;
}
## TYPES & DEFAULT VALUES ##
my $col_name = '';
my $col_type = '';
my $col_def = '';
# All the type is contained in the same line (type followed by parenthesis)
if ($doc =~ /^\W*(\w+)\W+(\w+\s?\(.*\))/ ){
$col_name = remove_char($1);
$col_type = $2;
if ($doc =~ /default\s*([^,\s]+)\s*.*(,|#).*/i) { $col_def = $1; } # Default value
add_column_type_and_default_value($col_name,$col_type,$col_def);
}
# The type is written in several lines
elsif ($doc =~ /^\W*(\w+)\W+(enum|set)(\s?\(.*)/i){ # The content is split in several lines
$col_name= remove_char($1);
$col_type="$2$3<br />";
my $end_type = 0;
while ($end_type != 1){
my $line = <SQLFILE>;
chomp $line;
#Regex counting VOODOO again
$parenth_count +=()= $line =~ /\(/gi;
$parenth_count -=()= $line =~ /\)/gi;
if ($line =~ /\)/) { # Close parenthesis
$end_type=1;
$line =~ /^\s*(.+)\)/;
$col_type .= "$1)";
}
else { # Add the content of the line
$line =~ /^\s*(.+)/;
$col_type .= $1.'<br />';
}
if ($line =~ /default\s*([^,\s]+)\s*.*(,|#).*/i) { $col_def = $1; } # Default value
}
add_column_type_and_default_value($col_name,$col_type,$col_def);
}
# All the type is contained in the same line (type without parenthesis)
elsif ($doc =~ /^\s*\W*(\w+)\W+(\w+)/ ){
$col_name = remove_char($1);
$col_type = $2;
if ($doc =~ /default\s*([^,\s]+)\s*.*(,|#).*/i) { $col_def = $1;} # Default value
add_column_type_and_default_value($col_name,$col_type,$col_def);
}
}
}
}
close(SQLFILE);
############
### Core ##
############
# Sort the headers names by alphabetic order
if ($sort_headers == 1) {
@header_names = sort(@header_names);
}
# Sort the tables names by alphabetic order
if ($sort_tables == 1) {
while ( my($header_name,$tables) = each (%{$tables_names})) {
@{$tables} = sort(@{$tables});
}
}
# List of tables by header
my $html_content = display_tables_list();
my $table_count = 1;
my $col_count = 1;
foreach my $header_name (@header_names) {
my $tables = $tables_names->{$header_name};
# Header display
if ($header_flag == 1 and $header_name ne 'default') {
$html_content .= qq{<br />\n<hr />\n<h2>$header_name</h2>\n};
my $header_desc = $documentation->{$header_name}{'desc'};
$html_content .= qq{<p>$header_desc</p>} if (defined($header_desc));
}
elsif ($header_flag == 0 or scalar @header_names == 1) {
$html_content .= qq{<hr />\n};
}
# Additional information
if ($header_name eq 'default' and defined($documentation->{$header_name}{'info'})) {
$html_content .= qq{<h2>Additional information about the schema</h2>\n};
}
$html_content .= add_info($documentation->{$header_name}{'info'});
# Tables display
foreach my $t_name (@{$tables}) {
$html_content .= add_table_name($t_name,$documentation->{$header_name}{'tables'}{$t_name}{colour});
$html_content .= add_description($documentation->{$header_name}{'tables'}{$t_name}{desc});
$html_content .= add_info($documentation->{$header_name}{'tables'}{$t_name}{info});
$html_content .= add_columns($t_name,@{$documentation->{$header_name}{'tables'}{$t_name}{column}});
$html_content .= add_see(@{$documentation->{$header_name}{'tables'}{$t_name}{see}});
}
}
$html_content .= add_legend();
## HTML/output file ##
open HTML, "> $html_file" or die "Can't open $html_file : $!";
print HTML $html_header."\n";
print HTML $html_content."\n";
print HTML $html_footer."\n";
close(HTML);
###############
## Methods ##
###############
sub display_tables_list {
my $html = qq{<h3>List of the tables:</h3>\n};
foreach my $header_name (@header_names) {
my $tables = $tables_names->{$header_name};
my $count = scalar @{$tables};
next if ($count == 0);
my $nb_col = ceil($count/$nb_by_col);
my $table_count = 0;
my $col_count = 1;
if ($nb_col>3) {
while ($nb_col>3) {
$nb_by_col += 5;
$nb_col = ceil($count/$nb_by_col);
}
$nb_col = 3;
}
# Header
if ($header_flag == 1 and $header_name ne 'default') {
$html .= qq{<h2>$header_name</h2>\n};
}
$html .= qq{<table><tr><td>\n <ul>\n};
# Table
foreach my $t_name (@{$tables}) {
if ($table_count == $nb_by_col and $col_count<$nb_col and $nb_col>1){
$html .= qq{ </ul></td><td><ul>\n};
$table_count = 0;
}
$html .= add_table_name_to_list($t_name);
$table_count ++;
}
$html .= qq{ </ul>\n</td></tr></table>\n};
}
return $html;
}
# If the line starts by a @<tag>, the previous tag content is added to the documentation hash.
# This method allows to describe the content of a tag in several lines.
sub fill_documentation {
my $t1 = shift;
my $t2 = shift;
if ($tag ne '') {
# Description tag (info, table or header)
if ($tag eq 'desc') {
# Additional description
if ($info ne '') {
$tag_content = $info.'@info@'.$tag_content;
# Table: additional information
if ($table ne '') {
push(@{$documentation->{$header}{'tables'}{$table}{'info'}},$tag_content);
}
# Header: additional information
else {
if (!$documentation->{$header}{'info'}) {
$documentation->{$header}{'info'} = [];
}
push(@{$documentation->{$header}{'info'}},$tag_content);
}
$info = '';
}
# Table description
elsif ($table ne '') {
$documentation->{$header}{'tables'}{$table}{$tag} = $tag_content;
}
# Header description
else {
$documentation->{$header}{'desc'} = $tag_content;
}
}
elsif ($tag eq 'colour') {
$documentation->{$header}{'tables'}{$table}{$tag} = $tag_content;
if (! grep {$tag_content eq $_} @colours) {
push (@colours,$tag_content);
}
}
elsif ($tag eq 'column') {
$tag_content =~ /(\w+)[\s\t]+(.+)/;
my $column = { 'name' => $1,
'type' => '',
'default' => '',
'index' => '',
'desc' => $2
};
push(@{$documentation->{$header}{'tables'}{$table}{$tag}},$column);
}
else{
push(@{$documentation->{$header}{'tables'}{$table}{$tag}},$tag_content);
}
}
# New tag initialized
if ($t1) {
$tag = $t1;
$tag_content = $t2;
}
else {
$tag = $tag_content = '';
}
}
sub add_table_name_to_list {
my $t_name = shift;
my $html = qq{ <li><a href="#$t_name"><b>$t_name</b></a></li>\n};
return $html;
}
sub add_table_name {
my $t_name = shift;
my $colour = shift || '#000000';
my $c_box = '';
if ($show_colour) {
$c_box = qq{
<td style="padding:0px;width:10px;background-color:$colour"></td>
<td style="width:2px"></td>};
}
my $html = qq{\n<br />
<table style="border: 2px groove #CCCCCC;background-color:#FAFAFF">
<tr style="vertical-align:middle">$c_box
<td style="width:500px;text-align:left;height:10px"><span id="$t_name" style="font-size:11pt;font-weight:bold">$t_name</span></td>
<td style="width:100px;text-align:right"><a id="a_$t_name" style="cursor:pointer;text-decoration:underline" onclick="show_hide('$t_name')">Show</a> columns</td>
</tr>
</table>\n};
return $html;
}
sub add_description {
my $desc = shift;
return qq{<p>$desc<\p>\n};
}
sub add_info {
my $infos = shift;
my $html = '';
foreach my $inf (@{$infos}) {
my ($title,$content) = split('@info@', $inf);
$html .= qq{ <table>
<tr class="bg3"><th>$title</th></tr>
<tr class="bg1"><td>$content</td></tr>
</table>\n};
}
return $html;
}
sub add_columns {
my @cols = @_;
my $table = shift @cols;
my $display_style = $display_col{$display};
my $html = qq{\n <div id="div_$table" style="display:$display_style">\n
<table style="border:1px outset #222222">
<tr class="bg3 center"><th style="width:180px">Column</th><th style="width:150px">Type</th><th style="width:100px">Default value</th><th style="width:400px">Description</th><th style="width:150px">Index</th></tr>\n};
my $bg = 1;
foreach my $col (@cols) {
my $name = $col->{name};
my $type = $col->{type};
my $default = $col->{default};
my $desc = $col->{desc};
my $index = $col->{index};
$desc =~ /\@link\s?(\w+)/;
my $table_to_link = qq{<a href="#$1">$1</a>};
$desc =~ s/\@link\s?\w+/$table_to_link/;
#$col =~ /^\s*(\w+)[\s\t]+(.+)\t+(.+)\t(.*)/;
$html .= qq{ <tr class="bg$bg"><td><b>$name</b></td><td>$type</td><td>$default</td><td>$desc</td><td>$index</td></tr>\n};
if ($bg==1) { $bg=2; }
else { $bg=1; }
}
$html .= qq {</table>\n</div>\n};
return $html;
}
sub add_see {
my @sees = @_;
my $html = '';
if (scalar @sees) {
$html .= qq{<p><b>See also:</b></p>\n<ul>\n};
foreach my $see (@sees) {
$html .= qq{ <li><a href="#$see">$see</a></li>\n};
}
$html .= qq{</ul>\n};
}
return $html;
}
sub add_column_index {
my $idx_type = shift;
my $idx_col = shift;
my $idx_name = shift;
my $index = $idx_type;
$idx_name = remove_char($idx_name);
if (defined($idx_name)) {
$index .= ": $idx_name";
}
my @idx_cols = split(',',$idx_col); # The index can involve several columns
my %is_found = ();
foreach my $i_col (@idx_cols) {
$i_col =~ s/\s//g; # Remove white spaces
$i_col = remove_char($i_col); # Remove the ` character
# In case of index using a number characters for a column
if ($i_col =~ /(.+)\(\d+\)/) {
$i_col = $1;
}
$is_found{$i_col} = 0;
foreach my $col (@{$documentation->{$header}{tables}{$table}{column}}) {
if ($col->{name} eq $i_col) {
if ($col->{index} ne '') {
$col->{index} .= '<br />';
}
$col->{index} .= lc($index);
$is_found{$i_col} = 1;
last;
}
}
}
# Description missing
while (my ($k,$v) = each(%is_found)) {
if ($v==0) {
print STDERR "INDEX: The description of the column '$k' is missing in the table $table!\n";
}
}
}
sub add_column_type_and_default_value {
my $c_name = shift;
my $c_type = shift;
my $c_default = shift;
$count_sql_col ++;
my $is_found = 0;
foreach my $col (@{$documentation->{$header}{'tables'}{$table}{column}}) {
if ($col->{name} eq $c_name) {
$col->{type} = $c_type;
$col->{default} = $c_default if ($c_default ne '');
$is_found = 1;
last;
}
}
# Description missing
if ($is_found==0) {
print STDERR "COLUMN: The description of the column '$c_name' is missing in the table $table!\n";
}
}
sub add_legend {
my $html = '';
my $default = 'Other tables';
return $html if (scalar @colours == 1);
$html .= qq{<br />\n<hr />\n<h3>Colour legend</h3>\n<table>};
foreach my $c (@colours) {
my $desc = '';
if ($c eq $default_colour) {
$desc = $default;
}
$html .= qq{<tr><td style="width:25px;height:15px;background-color:$c"></td><td>$desc</td></tr>\n};
}
$html .= '</table>';
return $html;
}
sub remove_char {
my $text = shift;
$text =~ s/`//g;
return $text;
}
sub usage {
print qq{
Usage: perl sql2html.pl [OPTION]
Convert the SQL documentation into an HTML document.
Options:
-help Print this message
An input file must be specified. This file must be a SQL file, with the "Java-doc like documentation".
For more information, please visit the following page:
http://www.ebi.ac.uk/seqdb/confluence/display/EV/SQL+documentation
-i A SQL file name (Required)
-o An HTML output file name (Required)
-d The name of the database (e.g Core, Variation, Functional Genomics, ...)
-c A flag to display the colours associated with the tables (1) or not (0).
By default, the value is set to 1.
-show_header A flag to display headers for a group of tables (1) or not (0).
By default, the value is set to 1.
-sort_headers A flag to sort (1) or not (0) the headers by alphabetic order.
By default, the value is set to 1.
-sort_tables A flag to sort (1) or not (0) the tables by alphabetic order.
By default, the value is set to 1.
} . "\n";
exit(0);
}