Commit aba8ca2c authored by Patrick Meidl's avatar Patrick Meidl
Browse files

changes from branch-vega-30-dev

parent cbfefbe1
......@@ -11,7 +11,7 @@ index dumps) for Vega.
./glovar_snp_density.pl
--species=Homo_sapiens
[--chr=6,13,14]
[--dry-run|-n]
[--dry_run|-n]
[--avdump|-a]
[--help|-h]
......@@ -65,7 +65,7 @@ my ($species, $chr, $dry, $avdump, $help);
&GetOptions(
"species=s" => \$species,
"chr=s" => \$chr,
"dry-run" => \$dry,
"dry_run" => \$dry,
"n" => \$dry,
"avdump" => \$avdump,
"a" => \$avdump,
......@@ -78,7 +78,7 @@ if($help || !$species){
./glovar_snp_density.pl
--species=Homo_sapiens
[--chr=6,13,14]
[--dry-run|-n]
[--dry_run|-n]
[--avdump|-a]
[--help|-h]\n\n);
exit;
......
......@@ -9,7 +9,7 @@ Wrapper for glovar_snp_density.pl
./glovar_snp_density.pl
--species=Homo_sapiens
[--dry-run|-n]
[--dry_run|-n]
[--avdump|-a]
=head1 DESCRIPTION
......@@ -53,7 +53,7 @@ use Getopt::Long;
my ($species, $dry, $avdump);
&GetOptions(
"species=s" => \$species,
"dry-run" => \$dry,
"dry_run" => \$dry,
"n" => \$dry,
"avdump" => \$avdump,
"a" => \$avdump,
......@@ -64,7 +64,7 @@ unless ($species) {
./glovar_snp_density.pl
--species=Homo_sapiens
[--avdump|-a]
[--dry-run|-n]\n\n);
[--dry_run|-n]\n\n);
exit;
}
......
......@@ -9,7 +9,7 @@ script to calculate gene densities and stats in Vega
./vega_gene_density.pl
--species=Homo_sapiens
[--dry-run|-n]
[--dry_run|-n]
[--help|-h]
=head1 DESCRIPTION
......@@ -63,7 +63,7 @@ my ($species, $chr, $dry, $help);
&GetOptions(
"species=s" => \$species,
"chr=s" => \$chr,
"dry-run" => \$dry,
"dry_run" => \$dry,
"n" => \$dry,
"help" => \$help,
"h" => \$help,
......@@ -74,7 +74,7 @@ if($help || !$species){
./vega_gene_density.pl
--species=Homo_sapiens
[--chr=1,2]
[--dry-run|-n]
[--dry_run|-n]
[--help|-h]\n\n);
exit;
}
......@@ -125,6 +125,8 @@ for my $slice ( @top_slices ) {
}
$big_block_size = int( $min_big_chr / 150 );
#use this size if another human is needed
#$big_block_size = 28050;
push @{$big_chr}, $big_block_size;
$small_block_size = int( $min_small_chr / 150 );
push @{$small_chr}, $small_block_size;
......
......@@ -31,7 +31,7 @@ use Data::Dumper;
my ($species, $dry, $help);
&GetOptions(
"species=s" => \$species,
"dry-run" => \$dry,
"dry_run" => \$dry,
"n" => \$dry,
"help" => \$help,
"h" => \$help,
......@@ -41,7 +41,7 @@ if($help || !$species){
print qq(Usage:
./vega_gene_density.pl
--species=Homo_sapiens
[--dry-run|-n]
[--dry_run|-n]
[--help|-h]\n\n);
exit;
}
......
......@@ -32,7 +32,7 @@ use Data::Dumper;
my ($species, $dry, $help);
&GetOptions(
"species=s" => \$species,
"dry-run" => \$dry,
"dry_run" => \$dry,
"n" => \$dry,
"help" => \$help,
"h" => \$help,
......@@ -42,7 +42,7 @@ if($help || !$species){
print qq(Usage:
./vega_gene_density.pl
--species=Homo_sapiens
[--dry-run|-n]
[--dry_run|-n]
[--help|-h]\n\n);
exit;
}
......
......@@ -56,13 +56,13 @@ sub new {
$self->debug("Building schema for $target from $schema");
die "Cannot open $schema" if (! -e $schema);
my $cmd = "mysql -u $user -p$pass -P $port -h $host $target < $schema";
my $cmd = "/usr/local/mysql/bin/mysql -u $user -p$pass -P $port -h $host $target < $schema";
system ($cmd);
if ($vega_schema) {
$self->debug("Adding vega tables for $target");
die "Cannot open vega creation script" if (! -e $vega_schema);
my $cmd = "mysql -u $user -p$pass -P $port -h $host $target < $vega_schema";
my $cmd = "/usr/local/mysql/bin/mysql -u $user -p$pass -P $port -h $host $target < $vega_schema";
system ($cmd);
}
......@@ -851,7 +851,7 @@ sub transfer_features {
" contig_strand, analysis_id, repeat_start, repeat_end, " .
" repeat_consensus_id, score FROM $source.repeat_feature $limit");
$self->debug("Readding indexes on repeat_feature");
$self->debug("Reading indexes on repeat_feature");
$dbh->do("ALTER TABLE $target.repeat_feature " .
"ADD INDEX seq_region_idx( seq_region_id, seq_region_start)");
$dbh->do("ALTER TABLE $target.repeat_feature " .
......@@ -875,7 +875,7 @@ sub transfer_features {
" hit_end, hit_name, cigar_line, evalue, perc_ident, score ".
"FROM $source.protein_align_feature $limit");
$self->debug("Readding indexes on protein_align_feature");
$self->debug("Reading indexes on protein_align_feature");
$dbh->do( qq{ALTER TABLE $target.protein_align_feature
ADD index seq_region_idx( analysis_id, seq_region_id,
seq_region_start, score )});
......@@ -899,7 +899,7 @@ sub transfer_features {
" perc_ident, score FROM $source.dna_align_feature $limit");
$self->debug("Readding indexes on dna_align_feature");
$self->debug("Reading indexes on dna_align_feature");
$dbh->do( qq{ALTER TABLE $target.dna_align_feature
ADD INDEX seq_region_idx(seq_region_id, analysis_id,
seq_region_start, score)});
......
......@@ -8,7 +8,7 @@ use SeqStoreConverter::vega::VBasicConverter;
use vars qw(@ISA);
@ISA = qw(SeqStoreConverter::vega::VBasicConverter SeqStoreConverter::CanisFamiliaris);
@ISA = qw(SeqStoreConverter::CanisFamiliaris SeqStoreConverter::vega::VBasicConverter);
1;
......@@ -7,7 +7,7 @@ use SeqStoreConverter::DanioRerio;
use SeqStoreConverter::vega::VBasicConverter;
use vars qw(@ISA);
@ISA = qw(SeqStoreConverter::vega::VBasicConverter SeqStoreConverter::DanioRerio);
@ISA = qw( SeqStoreConverter::DanioRerio SeqStoreConverter::vega::VBasicConverter);
sub copy_internal_clone_names {
my $self = shift;
......@@ -20,7 +20,7 @@ sub copy_internal_clone_names {
#get id for 'fpc_clone_id' attribute
$dbh->do("INSERT INTO $target.attrib_type (code,name,description)".
"values ('fpc_clone_id','fpc clone','clone id used for linking to Zebrafish webFPC')");
"values ('fpc_clone_id','fpc clone','clone id used for linking to Zebrafish webFPC')");
my ($attrib_id) = $dbh->selectrow_array("Select attrib_type_id from $target.attrib_type where code = 'fpc_clone_id'");
warn "No attrib id found\n" unless defined($attrib_id);
......@@ -38,69 +38,127 @@ sub copy_internal_clone_names {
my $insert_sth = $dbh->prepare("insert into $target.seq_region_attrib values (?,$attrib_id,?)");
while ($select1_sth->fetch()) {
$embl_name =~ s/([\d\w]+).*/$1/;
$select2_sth->bind_param(1,$embl_name);
$select2_sth->execute;
$insert_sth->bind_param(1,$seq_region_id);
while (my ($clone_name) = $select2_sth->fetchrow_array()) {
$insert_sth->bind_param(2,$clone_name);
$insert_sth->execute();
}
}
$embl_name =~ s/([\d\w]+).*/$1/;
$select2_sth->bind_param(1,$embl_name);
$select2_sth->execute;
$insert_sth->bind_param(1,$seq_region_id);
while (my ($clone_name) = $select2_sth->fetchrow_array()) {
$insert_sth->bind_param(2,$clone_name);
$insert_sth->execute();
}
}
}
sub update_clone_info {
my $self = shift;
my $target_cs_name = shift;
my $target = $self->target();
my $source = $self->source();
my $dbh = $self->dbh();
$self->debug("Vega Danio_specific - Transforming clone_id into seq_region_id for clone_info and current_clone_info");
foreach my $table_name ('clone_info','current_clone_info') {
my $select_st1 =
"SELECT ctg.name, ctg.clone_id " .
"FROM $source.contig ctg, $source.$table_name ci " .
"WHERE ctg.clone_id = ci.clone_id " .
"AND ctg.name not like 'ctg%' " .
"AND ctg.name not like 'NA%'";
my $query_results1 = $dbh->selectall_arrayref($select_st1);
my $i = 0;
foreach my $contig_name (@$query_results1) {
my $embl_acc = $contig_name->[0];
my $select_st2 =
"SELECT sr.seq_region_id " .
"FROM $target.seq_region sr " .
"WHERE sr.name = '$embl_acc'";
my @query_results2 = $dbh->selectrow_array($select_st2);
push @{$query_results1->[$i]},@query_results2;
$i++;
}
foreach my $clone (@$query_results1) {
my $seq_reg_id = $clone->[2];
my $clone_id = $clone->[1];
my $update_query =
"UPDATE $target.$table_name " .
"SET clone_id = '$seq_reg_id' " .
"WHERE clone_id = '$clone_id'";
$dbh->do($update_query);
}
my $alter_struct_1 =
"ALTER table $target.$table_name " .
"CHANGE clone_id seq_region_id int(10) not null";
my $alter_struct_2 =
"ALTER table $target.$table_name " .
"add unique index (seq_region_id)";
$dbh->do($alter_struct_1);
$dbh->do($alter_struct_2);
}
}
my $self = shift;
my $target_cs_name = shift;
my $target = $self->target();
my $source = $self->source();
my $dbh = $self->dbh();
# clone_info, current_clone_info
$self->debug("Vega Danio_specific - Transforming clone_id into seq_region_id for clone_info and current_clone_info");
foreach my $table_name ('clone_info','current_clone_info') {
my $select_st1 =
"SELECT ctg.name, ctg.clone_id " .
"FROM $source.contig ctg, $source.$table_name ci " .
"WHERE ctg.clone_id = ci.clone_id " .
"AND ctg.name not like 'ctg%' " .
"AND ctg.name not like 'NA%'";
my $query_results1 = $dbh->selectall_arrayref($select_st1);
my $i = 0;
foreach my $contig_name (@$query_results1) {
my $embl_acc = $contig_name->[0];
my $select_st2 =
"SELECT sr.seq_region_id " .
"FROM $target.seq_region sr " .
"WHERE sr.name = '$embl_acc'";
my @query_results2 = $dbh->selectrow_array($select_st2);
push @{$query_results1->[$i]},@query_results2;
$i++;
}
foreach my $clone (@$query_results1) {
my $seq_reg_id = $clone->[2];
my $clone_id = $clone->[1];
my $update_query =
"UPDATE $target.$table_name " .
"SET clone_id = '$seq_reg_id' " .
"WHERE clone_id = '$clone_id'";
$dbh->do($update_query);
}
my $alter_struct_1 =
"ALTER table $target.$table_name " .
"CHANGE clone_id seq_region_id int(10) not null";
my $alter_struct_2 =
"ALTER table $target.$table_name " .
"add unique index (seq_region_id)";
$dbh->do($alter_struct_1);
$dbh->do($alter_struct_2);
}
# assembly_tag
$self->debug("Vega Danio_specific - Transforming contig_id into seq_region_id for assembly_tag");
# first remove orphans from assembly_tag table (i.e. entries pointing to
# non-existing contigs)
my $numrows = $dbh->do(qq(
DELETE at
FROM $source.assembly_tag at
LEFT JOIN $source.contig c ON c.contig_id = at.contig_id
WHERE c.contig_id IS NULL
));
$self->debug(" Deleted $numrows orphans from assembly_tag");
my $select_st3 =
"SELECT ctg.name, ctg.contig_id " .
"FROM $source.contig ctg, $source.assembly_tag at " .
"WHERE ctg.contig_id = at.contig_id " .
"AND ctg.name not like 'ctg%' " .
"AND ctg.name not like 'NA%'";
my $query_results3 = $dbh->selectall_arrayref($select_st3);
my $j = 0;
foreach my $contig_name (@$query_results3) {
my $embl_acc = $contig_name->[0];
my $select_st4 =
"SELECT sr.seq_region_id " .
"FROM $target.seq_region sr " .
"WHERE sr.name = '$embl_acc'";
my @query_results4 = $dbh->selectrow_array($select_st4);
push @{$query_results3->[$j]}, @query_results4;
$j++;
}
foreach my $contig (@$query_results3) {
my $seq_reg_id = $contig->[2];
my $contig_id = $contig->[1];
my $update_query =
"UPDATE $target.assembly_tag " .
"SET contig_id = '$seq_reg_id' " .
"WHERE contig_id = '$contig_id'";
$dbh->do($update_query);
}
$dbh->do(" ALTER TABLE $target.assembly_tag
CHANGE contig_id seq_region_id int(10) UNSIGNED NOT NULL");
$dbh->do(" ALTER TABLE $target.assembly_tag
CHANGE contig_start seq_region_start int(10)");
$dbh->do(" ALTER TABLE $target.assembly_tag
CHANGE contig_end seq_region_end int(10)");
$dbh->do(" ALTER TABLE $target.assembly_tag
CHANGE contig_strand seq_region_strand tinyint(1)");
}
1;
......@@ -8,6 +8,6 @@ use SeqStoreConverter::vega::VBasicConverter;
use vars qw(@ISA);
@ISA = qw(SeqStoreConverter::vega::VBasicConverter SeqStoreConverter::HomoSapiens);
@ISA = qw(SeqStoreConverter::HomoSapiens SeqStoreConverter::vega::VBasicConverter);
1;
......@@ -8,6 +8,6 @@ use SeqStoreConverter::vega::VBasicConverter;
use vars qw(@ISA);
@ISA = qw(SeqStoreConverter::vega::VBasicConverter SeqStoreConverter::MusMusculus);
@ISA = qw(SeqStoreConverter::MusMusculus SeqStoreConverter::vega::VBasicConverter);
1;
package SeqStoreConverter::vega::VBasicConverter;
use strict;
use warnings;
use SeqStoreConverter::BasicConverter;
package SeqStoreConverter::vega::VBasicConverter;
use vars qw(@ISA);
@ISA = qw(SeqStoreConverter::BasicConverter);
sub remove_supercontigs {
my $self = shift;
......@@ -18,62 +14,41 @@ sub remove_supercontigs {
$self->debug("Vega specific - removing supercontigs from $target");
$dbh->do("DELETE FROM $target.meta ".
"WHERE meta_value like '%supercontig%'");
"WHERE meta_value like '%supercontig%'");
$dbh->do("DELETE FROM $target.coord_system ".
"WHERE name like 'supercontig'");
"WHERE name like 'supercontig'");
$dbh->do("DELETE $target.a ".
"FROM $target.assembly a, $target.seq_region sr ".
"WHERE sr.coord_system_id = 2 ".
"and a.asm_seq_region_id = sr.seq_region_id");
"FROM $target.assembly a, $target.seq_region sr ".
"WHERE sr.coord_system_id = 2 ".
"and a.asm_seq_region_id = sr.seq_region_id");
$dbh->do("DELETE FROM $target.seq_region ".
"WHERE coord_system_id = 2");
"WHERE coord_system_id = 2");
}
sub copy_other_tables {
my $self = shift;
#xref tables
$self->copy_tables("xref",
"go_xref",
"identity_xref",
"object_xref",
"external_db",
"external_synonym",
#marker/qtl related tables
"map",
"marker",
"marker_synonym",
"qtl",
"qtl_synonym",
#misc other tables
"supporting_feature",
"analysis",
"exon_transcript",
"interpro",
"gene_description",
"protein_feature",
#vega tables
"gene_synonym",
"transcript_info",
"current_gene_info",
"current_transcript_info",
"author",
"gene_name",
"transcript_class",
"gene_remark",
"gene_info",
"evidence",
"transcript_remark",
"clone_remark",
"clone_info",
"clone_info_keyword",
"clone_lock");
$self->copy_current_clone_info;
sub copy_other_vega_tables {
my $self = shift;
$self->copy_tables(
# vega tables
"gene_synonym",
"transcript_info",
"current_gene_info",
"current_transcript_info",
"author",
"gene_name",
"transcript_class",
"gene_remark",
"gene_info",
"evidence",
"transcript_remark",
"clone_remark",
"clone_info",
"clone_info_keyword",
"assembly_tag",
);
$self->copy_current_clone_info;
}
sub copy_current_clone_info {
......@@ -86,22 +61,64 @@ sub copy_current_clone_info {
$sth->finish();
}
sub update_genscan {
my $self = shift;
$self->debug("Vega specific - updating analysis name for Genscans");
my $target = $self->target();
my $sth = $self->dbh()->prepare
("UPDATE $target.analysis set logic_name = 'Vega_Genscan' where logic_name = 'Genscan'");
$sth->execute();
$sth->finish();
}
sub update_clone_info {
my $self = shift;
return;
my $self = shift;
return;
}
sub copy_internal_clone_names {
my $self = shift;
my $self = shift;
return;
}
sub copy_assembly_exception {
my $self = shift;
# copy assembly_exception table
$self->debug('Vega specific - copying assembly_exception table');
$self->copy_tables('assembly_exception');
my $source = $self->source();
my $target = $self->target();
my $dbh = $self->dbh();
# fix seq_region_id in assembly_exception
$self->debug('Vega specific - Updating seq_region_id in assembly_exception table');
$dbh->do(qq(
UPDATE $target.assembly_exception, $target.tmp_chr_map
SET assembly_exception.seq_region_id = tmp_chr_map.new_id
WHERE assembly_exception.seq_region_id = tmp_chr_map.old_id
));
$dbh->do(qq(
UPDATE $target.assembly_exception, $target.tmp_chr_map
SET assembly_exception.exc_seq_region_id = tmp_chr_map.new_id
WHERE assembly_exception.exc_seq_region_id = tmp_chr_map.old_id
));
# fix seq_region.length if necessary (this is the case if you have an
# assembly_exception at the end of a chromosome)
my $sth1 = $dbh->prepare(qq(
UPDATE $target.seq_region SET length = ? WHERE seq_region_id = ?
));
my $sth2 = $dbh->prepare(qq(
SELECT
sr.seq_region_id,
sr.length,
max(ae.seq_region_end)
FROM
$target.seq_region sr,
$target.assembly_exception ae
WHERE sr.seq_region_id = ae.seq_region_id
GROUP BY ae.seq_region_id
));
$sth2->execute;
while (my ($sr_id, $sr_length, $max_ae_length) = $sth2->fetchrow_array) {
if ($max_ae_length > $sr_length) {
$self->debug(" Updating seq_region.length for $sr_id (old $sr_length, new $max_ae_length)");
$sth1->execute($max_ae_length, $sr_id);
}
}
}
#HomoSapiens ecs1d mcvicker_old_schema mcvicker_new_schema
HomoSapiens ecs3:3307 mouse_vega040719_raw pm2_mouse_vega040719
#FuguRubripes ecs2f fugu_rubripes_core_18_2 mcvicker_new_fugu
CaenorhabditisElegans ecs2f caenorhabditis_elegans_core_18_102 mcvicker_new_elegans
#CaenorhabditisElegans ecs2f caenorhabditis_elegans_core_18_102 mcvicker_new_elegans
#CaenorhabditisBriggsae ecs2f caenorhabditis_briggsae_core_18_25 mcvicker_new_briggsae
#RattusNorvegicus ecs2f rattus_norvegicus_core_18_3 mcvicker_new_rat
#DrosophilaMelanogaster ecs2f drosophila_melanogaster_core_18_3a mcvicker_new_fly
......