diff --git a/misc-scripts/surgery/SchemaConverter.pm b/misc-scripts/surgery/SchemaConverter.pm new file mode 100644 index 0000000000000000000000000000000000000000..5bcc8a140450260a1a2c57422b59d64955fceca5 --- /dev/null +++ b/misc-scripts/surgery/SchemaConverter.pm @@ -0,0 +1,303 @@ +# Module to ease mysql schema conversion +# Author: Arne Stabenau +# Usage: + +# Make a schema_converter with new ( source_dbh, target_dbh ) +# source database should be filled, target is empty schema + + +# For each target table there will be a transfer +# Either with a self specified transfer function +# with a custom select +# from a renamed source table +# from the same name source table + +# configure the transfer with + +# table_rename( "oldname", "newname" ) +# table_skip( "tablename" ) + +# Each standard table transfer +# Either do a custom select statement +# or transfer columns with same name or renamed into each other +# specify columns to omit in target or get error + +# column_rename( "tablename", "oldcolname", "newcolname" ) +# column_skip( $targetdb, "table", "column" ) +# custom_select( $targetdb, "tablename", + +# Each row may be modified (custom select or standard select) +# specify a row_modifier function for the target table +# It takes a list ref and returns a list ref with the modified values +# ( you have to know the order of columns which come in have to go out db ) + +# set_row_modifier( "tablename", function_reference ) + +package SchemaConverter; + +use strict; +use DBI; +use Data::Dumper; + + +sub new { + my ( $class, @args ) = @_; + + my $self = {}; + bless $self, $class; + + $self->source_dbh( $args[0] ); + $self->target_dbh( $args[1] ); + + $self->read_dbs(); + + return $self; +} + +sub tmp_dir { + my ( $self, $arg ) = @_; + + ( defined $arg ) && + ( $self->{'tmp_dir'} = $arg ); + + return $self->{'tmp_dir'}; +} + +sub source_dbh { + my ( $self, $arg ) = @_; + + ( defined $arg ) && + ( $self->{'source_dbh'} = $arg ); + + return $self->{'source_dbh'}; +} + +sub target_dbh { + my ( $self, $arg ) = @_; + + ( defined $arg ) && + ( $self->{'target_dbh'} = $arg ); + + return $self->{'target_dbh'}; +} + +sub close_dbh { + my $self = shift; + + $self->source_dbh()->disconnect(); + $self->target_dbh()->disconnect(); +} + + +sub transfer { + my $self = shift; + + local *FH; + my $tmpdir; + + if( ! defined $self->tmp_dir() ) { + $self->close_dbh(); + die( "No tmp_dir specified" ); + } else { + $tmpdir = $self->tmp_dir(); + } + + for my $tablename ( keys %{$self->{targetdb}{tables}} ) { + my $skip = 0; + print STDERR "Transfer $tablename "; + + open( FH, ">$tmpdir/$tablename.txt" ) or die "cant open dumpfile"; + + if( exists $self->{tragetdb}{tables}{$tablename}{transfer} ) { + my $transfunc = $self->{targetdb}{tables}{$tablename}{transfer}; + &$transfunc( $self->source_dbh(), $self->target_dbh(), $tablename, \*FH ); + } else { + my $sourcetable; + if( exists $self->{targetdb}{tables}{$tablename}{link} ) { + $sourcetable = $self->{targetdb}{tables}{$tablename}{link}; + if( $sourcetable eq "" ) { + # skip this table + $skip = 1; + } + } else { + # find the sourcetable + if( exists $self->{targetdb}{tables}{$tablename}{select} ) { + + # if we have custom select, sourcetable doesnt make sense + $sourcetable = undef; + + } elsif( ! exists $self->{sourcedb}{tables}{$tablename} ) { + + die "Couldnt find source for $tablename. Enter empty sourcetable."; + + } else { + $sourcetable = $tablename; + } + } + if( ! $skip ) { + $self->standard_table_transfer( $sourcetable, $tablename, \*FH ); + } + } + + close FH; + + if( ! $skip ) { + $self->target_dbh->do( "load data infile '$tmpdir/$tablename.txt' into table $tablename" ); + } + + # upload ? + unlink "$tmpdir/$tablename.txt"; + print STDERR " finished\n"; + } + + # close databases + +} + + +sub standard_table_transfer { + my ( $self, $sourcetable, $targettable, $tmpfile ) = @_; + + my $sourcedb = $self->source_dbh(); + my $targetdb = $self->target_dbh(); +_ + # look for custom select + my $select = ""; + if( exists $self->{targetdb}{tables}{$targettable}{select} ) { + $select = $self->{targetdb}{tables}{$targettable}{select}; + } else { + # check if all columns have matching names + my @newcols = @{$self->{targetdb}{tables}{$targettable}{columns}}; + my @oldcols = @{$self->{sourcedb}{tables}{$sourcetable}{columns}}; + + my %rename; + + if( exists $self->{targetdb}{tables}{$targettable}{columnrename} ) { + %rename = %{$self->{targetdb}{tables}{$targettable}{columnrename}}; + } else { + %rename = (); + } + + # find all source columns and build select statement + + for my $colname ( @newcols ) { + my $selname; + + if( exists $rename{$colname} ) { + $selname = $rename{$colname}; + if( $selname eq "" ) { + $selname = "NULL"; + } + } else { + my $colExists = 0; + for my $oldcol ( @oldcols ) { + if( $oldcol eq $colname ) { + $selname = $colname; + $colExists = 1; + last; + } + } + if( ! $colExists ) { + die "Couldnt fill $targettable.$colname\n"; + } + } + + $select .= " $selname,"; + } + chop( $select ); + $select = "SELECT $select from $sourcetable"; + } + + my $sth = $self->source_db()->prepare( $select ); + $sth->execute(); + + my $row; + if( exists $self->{targetdb}{tables}{$targettable}{row_modify} ) { + my $rowmod = $self->{targetdb}{tables}{$targettable}{row_modify}; + + while( my $arref = $sth->fetchrow_arrayref() ) { + + $row = &{$rowmod}($arref); + + print $tmpfile ( join( "\t",@{$row} ),"\n" ); + } + } else { + while( my $arref = $sth->fetchrow_arrayref() ) { + $row = join( "\t", @$arref ); + print $tmpfile "$row\n"; + } + } +} + + + +sub read_dbs { + my $self = shift; + + my $dbh; + + for $db_name ('targetdb', 'sourcedb' ) { + if( $db_name eq 'targetdb' ) { + $dbh = $self->target_dbh(); + } else { + $dbh = $self->source_dbh(); + } + + my $sth = $dbh->prepare( "show tables" ); + $sth->execute(); + + while( my $arref = $sth->fetchrow_arrayref() ) { + $self->{$db_name}{tables}{$arref->[0]} = {}; + } + + my @tables = keys %{$self->{$db_name}{tables}}; + for my $table ( @tables ) { + $sth = $dbh->prepare( "show columns from $table" ); + $sth->execute(); + while( my $arref = $sth->fetchrow_arrayref () ) { + push( @{self->{$db_name}{tables}{$table}{columns}}, $arref->[0] ); + } + } + } +} + + +sub table_rename { + my ( $self, $oldtable, $newtable ) = @_; + $self->{targetdb}{tables}{$newtable}{link} = $oldtable; +} + +sub table_skip { + my ( $self, $newtable ) = @_; + $self->{targetdb}{tables}{$newtable}{link} = ""; +} + +sub column_rename { + my ( $self, $newtable, $oldcol, $newcol ) = @_; + $self->{targetdb}{tables}{$newtable}{columnrename}{$newcol} = $oldcol; +} + +sub column_skip { + my ( $self, $newtable, $newcol ) = @_; + $self->{targetdb}{tables}{$newtable}{columnrename}{$newcol} = ""; +} + +sub custom_select { + my ( $self, $newtable, $select ) = @_; + $self->{targetdb}{tables}{$newtable}{select} = $select; +} + +sub set_row_modifier { + my ( $self, $newtable, $row_modifier ) = @_; + $self->{targetdb}{tables}{$newtable}{row_modify} = $row_modifier; +} + +sub clear_target { + my $self = shift; + for my $tablename ( keys %{$self->{targetdb}{tables}} ) { + $self->target_dbh()->do( "delete from $tablename" ); + } +} + + + diff --git a/misc-scripts/surgery/convert120_130.pl b/misc-scripts/surgery/convert120_130.pl new file mode 100644 index 0000000000000000000000000000000000000000..bb02d2094c89335148c1415c6fbc8ed69af689ac --- /dev/null +++ b/misc-scripts/surgery/convert120_130.pl @@ -0,0 +1,93 @@ +# script to convert 120 database to 130 database +# uses SchemaConvert +# see doc there + + +use SchemaConvert; +use DBI; + +my $sourcedbh = DBI->connect("dbi:mysql:host=ensrv3.sanger.ac.uk;database=homo_sapiens_core_120", "ensro"); +my $targetdbh = DBI->connect("dbi:mysql:host=ecs1f.sanger.ac.uk;database=arne_ens130", "ensadmin", "ensembl"); + +my $sc = SchemaConverter->new( $sourcedbh, $targetdbh ); +$sc->tmp_dir( "/work1/stabenau/db" ); + + +$sc->custom_select( "assembly", "select s.fpcctg_name, c.chromosome_id, s.raw_id, s.chr_start, s.chr_end, s.fpcctg_start, s.fpcctg_end, s.raw_start, s.raw_end, s.raw_ori, s.type from chromosome c, static_golden_path s where s.chr_name = c.name" ); + +$sc->table_rename( "analysisprocess", "analysis" ); +$sc->column_rename( $targetDB, "analysis", "analysisId", "analysis_id" ); + +$sc->column_rename( "contig", "internal_id", "contig_id" ); +$sc->column_rename( "contig", "id", "name" ); +$sc->column_rename( "contig", "clone", "clone_id" ); +$sc->column_rename( "contig", "dna", "dna_id" ); +$sc->column_rename( "contig", "chromosomeId", "chromosome_id" ); +$sc->column_rename( "contig", "international_id", "international_name" ); + +$sc->column_rename( "clone", "internal_id", "clone_id" ); +$sc->column_rename( "clone", "id", "name" ); +$sc->column_rename( "clone", "embl_id", "embl_acc" ); + +$sc->custom_select( "map_density", "select c.chromosome_id, m.chr_start, m.chr_end, m.type, m.value from chromosome c, map_density m where m.chr_name = c.name" ); + + +$sc->column_rename( "dna", "id", "dna_id" ); + +$sc->table_skip( "exon_feature" ); +$sc->table_skip( "simple_feature" ); +$sc->table_skip( "dna_align_feature" ); +$sc->table_skip( "protein_align_feature" ); +$sc->table_skip( "repeat_feature" ); +$sc->table_skip( "repeat" ); +$sc->table_skip( "assembly_locations" ); + +$sc->table_rename( "objectXref", "object_xref" ); +$sc->column_rename( "object_xref", "objectxrefId","object_xref_id" ); +$sc->column_rename( "object_xref", "xrefId", "xref_id" ); + +$sc->table_rename( "identityXref", "identity_xref" ); +$sc->column_rename( "identity_xref", "objectxrefId","object_xref_id" ); + +$sc->table_rename( "Xref", "xref" ); +$sc->column_rename( "xref", "xrefId", "xref_id" ); +$sc->column_rename( "xref", "externalDBId", "external_db_id" ); +$sc->column_rename( "xref", "dbprimary_id", "dbprimary_acc" ); +$sc->column_rename( "xref", "display_id", "display_label" ); + +$sc->table_rename( "externalSynonym", "external_synonym" ); +$sc->column_rename( "external_synonym", "xrefId", "xref_id" ); + +$sc->table_rename( "externalDB", "external_db" ); +$sc->column_rename( "external_db", "externalDBId", "external_db_id" ); + +$sc->column_skip( "supporting_feature", "contig_id" ); + +$sc->column_rename( "protein_feature", "id", "protein_feature_id" ); +$sc->column_rename( "protein_feature", "translation", "translation_id" ); +$sc->column_rename( "protein_feature", "analysis", "analysis_id" ); +$sc->column_rename( "protein_feature", "hstart", "hit_start" ); +$sc->column_rename( "protein_feature", "hend", "hit_end" ); +$sc->column_rename( "protein_feature", "hid", "hit_id" ); +$sc->column_rename( "protein_feature", "perc_id", "perc_ident" ); + +$sc->column_rename( "exon", "seq_start", "contig_start" ); +$sc->column_rename( "exon", "seq_end", "contig_end" ); +$sc->column_rename( "exon", "strand", "contig_strand" ); + +$sc->column_rename( "gene", "analysisId", "analysis_id" ); + +$sc->table_rename( "repeat_feature", "r_feature" ); + +$sc->column_rename( "supporting_feature", "hid", "hit_id" ); +$sc->column_rename( "supporting_feature", "hstart", "hit_start" ); +$sc->column_rename( "supporting_feature", "hend", "hit_end" ); +$sc->column_rename( "supporting_feature", "seq_start", "contig_start" ); +$sc->column_rename( "supporting_feature", "seq_end", "contig_end" ); +$sc->column_rename( "supporting_feature", "hstrand", "hit_strand" ); +$sc->column_rename( "supporting_feature", "analysis", "analysis_id" ); + +$sc->transfer(); + + +