xref_config2sql.pl 4.54 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
#!/software/bin/perl -w

# $Id$

########################################################################
#                                                                      #
# This script will take the 'xref_config.ini' configuration            #
# file (or whatever file name given on the command line) and           #
# convert it into a SQL file that can be used in place of the old      #
# 'populate_metadata.sql' file found in the 'sql' subdirectory.        #
#                                                                      #
12 13 14 15 16 17
# The output from this script should be redirected to a file that      #
# you manually run to populate your Xref database, just as was done    #
# with 'populate_metadata.sql'.  The safest thing to do is just to     #
# overwrite 'sql/populate_metadata.sql' with the output of this        #
# script.  This will ensure that 'xref_parser.pl populates the Xref    #
# database with the correct data.                                      #
18 19 20 21 22 23 24 25 26
#                                                                      #
########################################################################

use strict;
use warnings;

use Config::IniFiles;

my $config =
27 28 29
  Config::IniFiles->new( -file =>
      ( defined $ARGV[0] && -f $ARGV[0] ? $ARGV[0] : 'xref_config.ini' )
  );
30 31 32 33 34 35 36 37

# Do the species.

print( '#' x 80, "\n" );
print("# SPECIES\n");
print("\n");

foreach my $section ( $config->GroupMembers('species') ) {
38
  my $species_name = substr( $section, 8 );
39

40 41
  my @taxonomy_ids =
    split( /\n/, $config->val( $section, 'taxonomy_id' ) );
42

43
  my $species_id = $taxonomy_ids[0];
44

45
  printf( "# Species '%s' (id = %d)\n", $species_name, $species_id );
46

47 48 49
  foreach my $taxonomy_id (@taxonomy_ids) {
    print(   "INSERT INTO species "
           . "(species_id, taxonomy_id, name, aliases)\n" );
50

51 52 53 54
    printf( "VALUES (%d, %d, '%s', '%s');\n",
            $species_id, $taxonomy_id, $species_name,
            $config->val( $section, 'aliases' ) );
  }
55

56
  print("\n");
57 58 59 60 61 62 63 64 65
}

# Do the sources.

print( '#' x 80, "\n" );
print("# SOURCES\n");
print("\n");

my $source_id = 0;
66 67
foreach my $section ( sort( $config->GroupMembers('source') ) ) {
  my $source_name = substr( $section, 7 );
68

69
  $config->newval( $section, 'id', ++$source_id );
70

71
  printf( "# Source '%s' (id = %d)\n", $source_name, $source_id );
72

73 74 75
  print(   "INSERT INTO source "
         . "(name, source_release, download, ordered, "
         . "priority, priority_description)\n" );
76

77 78 79 80 81 82
  printf( "VALUES ('%s', '1', '%s', %d, %d, '%s');\n",
          $config->val( $section, 'name' ),
          $config->val( $section, 'download' ),
          $config->val( $section, 'order' ),
          $config->val( $section, 'priority' ),
          $config->val( $section, 'prio_descr' ) );
83

84
  print("\n");
85 86 87 88 89 90 91 92
}

# Do the data files.

print( '#' x 80, "\n" );
print("# DATA FILES\n");
print("\n");

93 94 95
foreach my $species_section ( sort( $config->GroupMembers('species') ) )
{
  my $species_name = substr( $species_section, 8 );
96

97 98
  my @taxonomy_ids =
    split( /\n/, $config->val( $species_section, 'taxonomy_id' ) );
99

100
  my $species_id = $taxonomy_ids[0];
101

102 103 104 105 106
  print( '#', '-' x 79, "\n" );
  printf( "# Data for species '%s' (id = %d)\n",
          $species_name, $species_id );
  print( '#', '-' x 79, "\n" );
  print("\n");
107

108 109 110 111
  foreach my $source_name (
     sort( split( /\n/, $config->val( $species_section, 'source' ) ) ) )
  {
    my $source_section = sprintf( "source %s", $source_name );
112

113 114 115 116 117 118 119 120
    if ( !defined( $config->val( $source_section, 'id' ) ) ) {
      die(
           sprintf( "Can not find source section '[%s]'\n"
                      . "while reading species section '[%s]'\n",
                    $source_section, $species_section
           ) );
    }

121 122
    printf( "# Data from source '%s' (id = %d)\n",
            $source_name, $config->val( $source_section, 'id' ) );
123

124 125 126
    print(   "INSERT INTO source_url "
           . "(source_id, species_id, url, release_url, "
           . "file_modified_date, upload_date, parser)\n" );
127

128 129
    my @uris =
      split( /\n/, $config->val( $source_section, 'data_uri', '' ) );
130

131
    my $release_uri = $config->val( $source_section, 'release_uri' );
132

133 134 135 136 137
    if ( $release_uri !~ /\w/ ) {
      $release_uri = '\N';
    } else {
      $release_uri = "'$release_uri'";
    }
138

139 140 141 142 143 144
    printf( "VALUES (%d, %d, '%s', %s, now(), now(), '%s');\n",
            $config->val( $source_section, 'id' ),
            $species_id,
            join( ' ', @uris ),
            $release_uri,
            $config->val( $source_section, 'parser' ) );
145

146 147
    print("\n");
  } ## end foreach my $source_name ( sort...
148
} ## end foreach my $species_section...