Skip to content
Snippets Groups Projects
Commit ff057e9a authored by Andreas Kusalananda Kähäri's avatar Andreas Kusalananda Kähäri
Browse files

Rewrite with bugfixes.

parent d146576e
No related branches found
No related tags found
No related merge requests found
#/bin/ksh -x
#!/bin/ksh -ex
# $Id$
#
......@@ -8,119 +8,168 @@
# Author: Andreas Kahari <andreas.kahari@ebi.ac.uk>
#
# Gets a database from the FTP site, puts it in ./databases/
function getdb
export LANG=C
ftpsite='ftp.ensembl.org'
ftppass='${LOGNAME}@$(hostname).$(domainname)'
dbdir='./databases'
deltadir='./deltas'
build_cmd='./build.pl'
apply_cmd='./apply.pl'
time_cmd='/usr/bin/time'
xdelta_cmd='./xdelta.osf'
perl_cmd='/usr/local/ensembl/bin/perl -w'
trapsigs="INT HUP TERM"
#-------------------------------------------------------------
# Function: file_list
# Usage: file_list
#
# Downloads the ls-lR.Z file off the FTP site into the current
# working directory. Extracts the names of the files that we are
# interested in from it and outputs them on standard output. The
# format of the output is "path dbname version", where "path" is
# the path of the FTP directory, "dbname" is the name of the
# database, and "version" is the version of the database. The
# output is sorted on "dbname", then on "version".
function file_list
{
typeset path=$1
typeset db=$2
typeset ver=$3
typeset dbver=${db}_${ver}
if [[ ! -d databases/${dbver} ]]; then
trap "rm -rf databases/${dbver}; exit 1" INT
mkdir -p databases/${dbver}
( cd databases/${dbver}
ftp -i -n -v ftp.ensembl.org <<EOT
user anonymous $(whoami)@$(hostname).$(domainname)
cd ${path}/${dbver}
bin
mget *
bye
EOT
)
trap - INT
fi
ftp -i -n -v <<-EOT >/dev/null
open ${ftpsite}
user anonymous ${ftppass}
binary
get ls-lR.Z
EOT
gunzip -c ls-lR.Z |
grep 'data/mysql/.*[0-9][0-9]*_[0-9][0-9]*' |
sed -n 's/^\(.*\)\/\([^\/]*\)_\([0-9][0-9]*_[0-9][0-9]*.*\):$/\1 \2 \3/p' |
sort -k2,2 -k3,3
}
# Builds delta files, and applies them for verification
function do_delta
{
typeset db=$1
typeset v1=$2
typeset v2=$3
#-------------------------------------------------------------
typeset path1=$4
typeset path2=$5
# Function: cleanup
# Usage: cleanup dbname [version]
#
# Remove a downloaded database from ${dbdir}. If "version" is
# ommited, remove all versions of the database.
typeset vdir=deltas/to_${v2}
vdir=${vdir%_*[0-9]}
function cleanup
{
typeset dbname=$1
typeset version=${2:-'*'}
if [[ ! -d ${vdir} ]]; then
mkdir -p ${vdir}
if [[ ! -d ${dbdir} ]]; then
return
fi
typeset build_out=${vdir}/${db}_${v1}_delta_${v2}_build.out
typeset apply_out=${vdir}/${db}_${v1}_delta_${v2}_apply.out
rm -f -r ${dbdir}/${dbname}_${version}
}
if [[ ! -f $build_out ]]; then
getdb $path1 $db $v1
getdb $path2 $db $v2
#-------------------------------------------------------------
trap "rm $build_out; exit 1" INT
# Function: fetch_db
# Usage: fetch_db path dbname version
#
# Fetches version "version" of the database "dbname" at the
# path "path" off the ${ftpsite}. The database will be stored
# in ${dbdir}.
/usr/bin/time perl -w ./build.pl -c ./xdelta.osf \
-s databases -d ${vdir} \
$db $v1 $v2 2>&1 | tee $build_out
trap - INT
fi
if [[ ! -f $apply_out ]]; then
trap "rm $apply_out; exit 1" INT
/usr/bin/time perl -w ./apply.pl -c ./xdelta.osf \
-d databases -s ${vdir} \
$db $v1 $v2 2>&1 | tee $apply_out
trap - INT
function fetch_db
{
typeset path=$1
typeset dbname=$2
typeset version=$3
if [[ -d ${dbdir}/${dbname}_${version} ]]; then
return
fi
mkdir -p ${dbdir}/${dbname}_${version}
trap "rm -rf ${dbdir}/${dbname}_${version}; exit 1" ${trapsigs}
(
cd ${dbdir}
ftp -i -n -v <<-EOT
open ${ftpsite}
user anonymous ${ftppass}
binary
cd ${path}
mget ${dbname}_${version}/*
EOT
)
trap - ${trapsigs}
}
# Removes a database that was fetched
function cleandb
#-------------------------------------------------------------
# Function: build_delta
# Usage: build_delta dbname opath oversion path version
#
# Records the changes between version "oversion" and "version"
# of database "dbname". Also tests the generated delta files.
function build_delta
{
typeset db=$1
typeset ver=$2
typeset dbname=$1
typeset opath=$2
typeset oversion=$3
typeset path=$4
typeset version=$5
typeset outdir=${deltadir}/to_${version%_*[0-9]*}
typeset bout=${outdir}/${dbname}_${oversion}_delta_${version}_build.out
typeset aout=${outdir}/${dbname}_${oversion}_delta_${version}_apply.out
if [[ ! -f ${bout} ]]; then
fetch_db ${opath} ${dbname} ${oversion}
fetch_db ${path} ${dbname} ${version}
trap "rm ${bout}; exit 1" ${trapsigs}
${time_cmd} ${perl_cmd} ${build_cmd} -c ${xdelta_cmd} \
-s ${dbdir} -d ${outdir} \
${dbname} ${oversion} ${version} | tee ${bout}
trap - ${trapsigs}
fi
if [[ -d databases && -n $db && -n $ver ]]; then
rm -rf databases/${db}_${ver}
rm -rf databases/${db}_${ver}.????
if [[ ! -f ${aout} ]]; then
trap "rm ${aout}; exit 1" ${trapsigs}
${time_cmd} ${perl_cmd} ${apply_cmd} -c ${xdelta_cmd} \
-d ${dbdir} -s ${outdir} \
${dbname} ${oversion} ${version} | tee ${aout}
trap - ${trapsigs}
fi
}
# --------------- Main
#-------------------------------------------------------------
# For debugging
typeset -ft getdb
typeset -ft do_delta
typeset -ft cleandb
typeset -ft fetch_db
typeset -ft build_delta
# A regular expression that should be avoided
avoid_re=''
file_list |
while read path dbname version; do
if [[ -n ${odbname} ]]; then
if [[ ${odbname} != ${dbname} ]]; then
cleanup ${odbname}
# A regular expression that should be required
require_re='.'
opath=${path}
odbname=${dbname}
oversion=${version}
version_re='[0-9][0-9]*_[0-9][0-9]*'
continue
fi
# Use ftp://ftp.ensembl.org/ls-lR.Z to figure out what files are
# available
lynx -source ftp://ftp.ensembl.org/ls-lR.Z | \
sed -n 's/^\.\(.*data\/mysql.*\)\/\(.*\)_\('"$version_re"'\):$/\1 \2 \3/p' | \
grep -v $avoid_re | grep $require_re | sort -k2 >ls-lR
while read path db ver; do
if [[ $db != $this_db ]]; then
cleandb $this_db $ver
cleandb $this_db $old_ver
this_db=$db
old_ver=$ver
old_path=$path
continue
build_delta ${dbname} ${opath} ${oversion} ${path} ${version}
cleanup ${dbname} ${oversion}
fi
do_delta $this_db $old_ver $ver $old_path $path
cleandb $this_db $old_ver
old_ver=$ver
old_path=$path
done <ls-lR
rm -rf databases/*
opath=${path}
odbname=${dbname}
oversion=${version}
done
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment