Skip to content
Snippets Groups Projects
Commit fe55cc90 authored by Arne Stabenau's avatar Arne Stabenau
Browse files

Old script for 120 130 schema convert split into object and script part

hopefully useful for comeing schema conversions.
parent 247a59b6
No related branches found
No related tags found
No related merge requests found
# Module to ease mysql schema conversion
# Author: Arne Stabenau
# Usage:
# Make a schema_converter with new ( source_dbh, target_dbh )
# source database should be filled, target is empty schema
# For each target table there will be a transfer
# Either with a self specified transfer function
# with a custom select
# from a renamed source table
# from the same name source table
# configure the transfer with
# table_rename( "oldname", "newname" )
# table_skip( "tablename" )
# Each standard table transfer
# Either do a custom select statement
# or transfer columns with same name or renamed into each other
# specify columns to omit in target or get error
# column_rename( "tablename", "oldcolname", "newcolname" )
# column_skip( $targetdb, "table", "column" )
# custom_select( $targetdb, "tablename",
# Each row may be modified (custom select or standard select)
# specify a row_modifier function for the target table
# It takes a list ref and returns a list ref with the modified values
# ( you have to know the order of columns which come in have to go out db )
# set_row_modifier( "tablename", function_reference )
package SchemaConverter;
use strict;
use DBI;
use Data::Dumper;
sub new {
my ( $class, @args ) = @_;
my $self = {};
bless $self, $class;
$self->source_dbh( $args[0] );
$self->target_dbh( $args[1] );
$self->read_dbs();
return $self;
}
sub tmp_dir {
my ( $self, $arg ) = @_;
( defined $arg ) &&
( $self->{'tmp_dir'} = $arg );
return $self->{'tmp_dir'};
}
sub source_dbh {
my ( $self, $arg ) = @_;
( defined $arg ) &&
( $self->{'source_dbh'} = $arg );
return $self->{'source_dbh'};
}
sub target_dbh {
my ( $self, $arg ) = @_;
( defined $arg ) &&
( $self->{'target_dbh'} = $arg );
return $self->{'target_dbh'};
}
sub close_dbh {
my $self = shift;
$self->source_dbh()->disconnect();
$self->target_dbh()->disconnect();
}
sub transfer {
my $self = shift;
local *FH;
my $tmpdir;
if( ! defined $self->tmp_dir() ) {
$self->close_dbh();
die( "No tmp_dir specified" );
} else {
$tmpdir = $self->tmp_dir();
}
for my $tablename ( keys %{$self->{targetdb}{tables}} ) {
my $skip = 0;
print STDERR "Transfer $tablename ";
open( FH, ">$tmpdir/$tablename.txt" ) or die "cant open dumpfile";
if( exists $self->{tragetdb}{tables}{$tablename}{transfer} ) {
my $transfunc = $self->{targetdb}{tables}{$tablename}{transfer};
&$transfunc( $self->source_dbh(), $self->target_dbh(), $tablename, \*FH );
} else {
my $sourcetable;
if( exists $self->{targetdb}{tables}{$tablename}{link} ) {
$sourcetable = $self->{targetdb}{tables}{$tablename}{link};
if( $sourcetable eq "" ) {
# skip this table
$skip = 1;
}
} else {
# find the sourcetable
if( exists $self->{targetdb}{tables}{$tablename}{select} ) {
# if we have custom select, sourcetable doesnt make sense
$sourcetable = undef;
} elsif( ! exists $self->{sourcedb}{tables}{$tablename} ) {
die "Couldnt find source for $tablename. Enter empty sourcetable.";
} else {
$sourcetable = $tablename;
}
}
if( ! $skip ) {
$self->standard_table_transfer( $sourcetable, $tablename, \*FH );
}
}
close FH;
if( ! $skip ) {
$self->target_dbh->do( "load data infile '$tmpdir/$tablename.txt' into table $tablename" );
}
# upload ?
unlink "$tmpdir/$tablename.txt";
print STDERR " finished\n";
}
# close databases
}
sub standard_table_transfer {
my ( $self, $sourcetable, $targettable, $tmpfile ) = @_;
my $sourcedb = $self->source_dbh();
my $targetdb = $self->target_dbh();
_
# look for custom select
my $select = "";
if( exists $self->{targetdb}{tables}{$targettable}{select} ) {
$select = $self->{targetdb}{tables}{$targettable}{select};
} else {
# check if all columns have matching names
my @newcols = @{$self->{targetdb}{tables}{$targettable}{columns}};
my @oldcols = @{$self->{sourcedb}{tables}{$sourcetable}{columns}};
my %rename;
if( exists $self->{targetdb}{tables}{$targettable}{columnrename} ) {
%rename = %{$self->{targetdb}{tables}{$targettable}{columnrename}};
} else {
%rename = ();
}
# find all source columns and build select statement
for my $colname ( @newcols ) {
my $selname;
if( exists $rename{$colname} ) {
$selname = $rename{$colname};
if( $selname eq "" ) {
$selname = "NULL";
}
} else {
my $colExists = 0;
for my $oldcol ( @oldcols ) {
if( $oldcol eq $colname ) {
$selname = $colname;
$colExists = 1;
last;
}
}
if( ! $colExists ) {
die "Couldnt fill $targettable.$colname\n";
}
}
$select .= " $selname,";
}
chop( $select );
$select = "SELECT $select from $sourcetable";
}
my $sth = $self->source_db()->prepare( $select );
$sth->execute();
my $row;
if( exists $self->{targetdb}{tables}{$targettable}{row_modify} ) {
my $rowmod = $self->{targetdb}{tables}{$targettable}{row_modify};
while( my $arref = $sth->fetchrow_arrayref() ) {
$row = &{$rowmod}($arref);
print $tmpfile ( join( "\t",@{$row} ),"\n" );
}
} else {
while( my $arref = $sth->fetchrow_arrayref() ) {
$row = join( "\t", @$arref );
print $tmpfile "$row\n";
}
}
}
sub read_dbs {
my $self = shift;
my $dbh;
for $db_name ('targetdb', 'sourcedb' ) {
if( $db_name eq 'targetdb' ) {
$dbh = $self->target_dbh();
} else {
$dbh = $self->source_dbh();
}
my $sth = $dbh->prepare( "show tables" );
$sth->execute();
while( my $arref = $sth->fetchrow_arrayref() ) {
$self->{$db_name}{tables}{$arref->[0]} = {};
}
my @tables = keys %{$self->{$db_name}{tables}};
for my $table ( @tables ) {
$sth = $dbh->prepare( "show columns from $table" );
$sth->execute();
while( my $arref = $sth->fetchrow_arrayref () ) {
push( @{self->{$db_name}{tables}{$table}{columns}}, $arref->[0] );
}
}
}
}
sub table_rename {
my ( $self, $oldtable, $newtable ) = @_;
$self->{targetdb}{tables}{$newtable}{link} = $oldtable;
}
sub table_skip {
my ( $self, $newtable ) = @_;
$self->{targetdb}{tables}{$newtable}{link} = "";
}
sub column_rename {
my ( $self, $newtable, $oldcol, $newcol ) = @_;
$self->{targetdb}{tables}{$newtable}{columnrename}{$newcol} = $oldcol;
}
sub column_skip {
my ( $self, $newtable, $newcol ) = @_;
$self->{targetdb}{tables}{$newtable}{columnrename}{$newcol} = "";
}
sub custom_select {
my ( $self, $newtable, $select ) = @_;
$self->{targetdb}{tables}{$newtable}{select} = $select;
}
sub set_row_modifier {
my ( $self, $newtable, $row_modifier ) = @_;
$self->{targetdb}{tables}{$newtable}{row_modify} = $row_modifier;
}
sub clear_target {
my $self = shift;
for my $tablename ( keys %{$self->{targetdb}{tables}} ) {
$self->target_dbh()->do( "delete from $tablename" );
}
}
# script to convert 120 database to 130 database
# uses SchemaConvert
# see doc there
use SchemaConvert;
use DBI;
my $sourcedbh = DBI->connect("dbi:mysql:host=ensrv3.sanger.ac.uk;database=homo_sapiens_core_120", "ensro");
my $targetdbh = DBI->connect("dbi:mysql:host=ecs1f.sanger.ac.uk;database=arne_ens130", "ensadmin", "ensembl");
my $sc = SchemaConverter->new( $sourcedbh, $targetdbh );
$sc->tmp_dir( "/work1/stabenau/db" );
$sc->custom_select( "assembly", "select s.fpcctg_name, c.chromosome_id, s.raw_id, s.chr_start, s.chr_end, s.fpcctg_start, s.fpcctg_end, s.raw_start, s.raw_end, s.raw_ori, s.type from chromosome c, static_golden_path s where s.chr_name = c.name" );
$sc->table_rename( "analysisprocess", "analysis" );
$sc->column_rename( $targetDB, "analysis", "analysisId", "analysis_id" );
$sc->column_rename( "contig", "internal_id", "contig_id" );
$sc->column_rename( "contig", "id", "name" );
$sc->column_rename( "contig", "clone", "clone_id" );
$sc->column_rename( "contig", "dna", "dna_id" );
$sc->column_rename( "contig", "chromosomeId", "chromosome_id" );
$sc->column_rename( "contig", "international_id", "international_name" );
$sc->column_rename( "clone", "internal_id", "clone_id" );
$sc->column_rename( "clone", "id", "name" );
$sc->column_rename( "clone", "embl_id", "embl_acc" );
$sc->custom_select( "map_density", "select c.chromosome_id, m.chr_start, m.chr_end, m.type, m.value from chromosome c, map_density m where m.chr_name = c.name" );
$sc->column_rename( "dna", "id", "dna_id" );
$sc->table_skip( "exon_feature" );
$sc->table_skip( "simple_feature" );
$sc->table_skip( "dna_align_feature" );
$sc->table_skip( "protein_align_feature" );
$sc->table_skip( "repeat_feature" );
$sc->table_skip( "repeat" );
$sc->table_skip( "assembly_locations" );
$sc->table_rename( "objectXref", "object_xref" );
$sc->column_rename( "object_xref", "objectxrefId","object_xref_id" );
$sc->column_rename( "object_xref", "xrefId", "xref_id" );
$sc->table_rename( "identityXref", "identity_xref" );
$sc->column_rename( "identity_xref", "objectxrefId","object_xref_id" );
$sc->table_rename( "Xref", "xref" );
$sc->column_rename( "xref", "xrefId", "xref_id" );
$sc->column_rename( "xref", "externalDBId", "external_db_id" );
$sc->column_rename( "xref", "dbprimary_id", "dbprimary_acc" );
$sc->column_rename( "xref", "display_id", "display_label" );
$sc->table_rename( "externalSynonym", "external_synonym" );
$sc->column_rename( "external_synonym", "xrefId", "xref_id" );
$sc->table_rename( "externalDB", "external_db" );
$sc->column_rename( "external_db", "externalDBId", "external_db_id" );
$sc->column_skip( "supporting_feature", "contig_id" );
$sc->column_rename( "protein_feature", "id", "protein_feature_id" );
$sc->column_rename( "protein_feature", "translation", "translation_id" );
$sc->column_rename( "protein_feature", "analysis", "analysis_id" );
$sc->column_rename( "protein_feature", "hstart", "hit_start" );
$sc->column_rename( "protein_feature", "hend", "hit_end" );
$sc->column_rename( "protein_feature", "hid", "hit_id" );
$sc->column_rename( "protein_feature", "perc_id", "perc_ident" );
$sc->column_rename( "exon", "seq_start", "contig_start" );
$sc->column_rename( "exon", "seq_end", "contig_end" );
$sc->column_rename( "exon", "strand", "contig_strand" );
$sc->column_rename( "gene", "analysisId", "analysis_id" );
$sc->table_rename( "repeat_feature", "r_feature" );
$sc->column_rename( "supporting_feature", "hid", "hit_id" );
$sc->column_rename( "supporting_feature", "hstart", "hit_start" );
$sc->column_rename( "supporting_feature", "hend", "hit_end" );
$sc->column_rename( "supporting_feature", "seq_start", "contig_start" );
$sc->column_rename( "supporting_feature", "seq_end", "contig_end" );
$sc->column_rename( "supporting_feature", "hstrand", "hit_strand" );
$sc->column_rename( "supporting_feature", "analysis", "analysis_id" );
$sc->transfer();
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment