Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
32449642
Commit
32449642
authored
23 years ago
by
Philip Lijnzaad
Browse files
Options
Downloads
Patches
Plain Diff
added expression
parent
a8386f14
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
misc-scripts/utilities/satellite_dbdump_bychr.pl
+109
-38
109 additions, 38 deletions
misc-scripts/utilities/satellite_dbdump_bychr.pl
with
109 additions
and
38 deletions
misc-scripts/utilities/satellite_dbdump_bychr.pl
+
109
−
38
View file @
32449642
...
...
@@ -63,6 +63,9 @@ my $mysqldump = 'mysqldump'; # in $PATH we trust
# satellites:
my
$famdb
;
my
$diseaseb
;
my
$mapsdb
;
my
$expressiondb
;
my
$estdb
;
# end of satellites
&GetOptions
(
...
...
@@ -77,6 +80,8 @@ my $diseaseb;
'
family:s
'
=>
\
$famdb
,
'
disease:s
'
=>
\
$diseasedb
,
'
maps:s
'
=>
\
$mapsdb
,
'
expression:s
'
=>
\
$expressiondb
,
'
est:s
'
=>
\
$estdb
,
);
die
"
need a litedb; use -litedb something
"
unless
$litedb
;
...
...
@@ -88,52 +93,61 @@ if ($lim) {
$limit
=
"
limit
$lim
";
}
if
(
$famdb
)
{
my
$dumpdir
=
"
$workdir
/
$famdb
";
dump_schema
(
$famdb
,
$dumpdir
,
'
family.sql
');
&dump_family
(
$famdb
);
&dump_disease
(
$diseasedb
);
&dump_maps
(
$mapsdb
);
&dump_expression
(
$expressiondb
);
sub
dump_family
{
my
(
$satdb
)
=
@_
;
return
unless
$satdb
;
dump_schema
(
$satdb
);
my
$sql
;
$sql
=
"
SELECT distinct f.*
FROM
$
fam
db
.family f,
$litedb
.gene g
FROM
$
sat
db
.family f,
$litedb
.gene g
WHERE g.chr_name = '
$chr
'
and g.family = f.id
$limit
";
dump_data
(
$
litedb
,
$sql
,
$dumpdir
,
'
family
.dat
'
);
dump_data
(
$
sql
,
$satdb
,
'
family
'
);
$sql
=
"
SELECT fm.*
FROM
$
fam
db
.family_members fm,
$
fam
db
.family f,
$litedb
.gene g
FROM
$
sat
db
.family_members fm,
$
sat
db
.family f,
$litedb
.gene g
WHERE g.chr_name = '
$chr
'
and g.family = f.id
and f.internal_id = fm.family
$limit
";
dump_data
(
$
litedb
,
$sql
,
$dumpdir
,
'
family_members
.dat
'
);
dump_data
(
$
sql
,
$satdb
,
'
family_members
'
);
}
# family
if
(
$diseasedb
)
{
my
$dumpdir
=
"
$workdir
/
$diseasedb
";
dump_schema
(
$diseasedb
,
$dumpdir
,
'
disease.sql
');
sub
dump_disease
{
my
(
$satdb
)
=
@_
;
return
unless
$satdb
;
dump_schema
(
$satdb
);
# may need an ALTER TABLE gene ADD KEY(gene_symbol);
my
$sql
;
$sql
=
"
SELECT dg.*
FROM
$
disease
db
.gene dg,
FROM
$
sat
db
.gene dg,
$litedb
.gene lg,
$litedb
.gene_xref lgx
WHERE lg.chr_name = '
$chr
'
AND lg.gene = lgx.gene
AND lgx.display_id = dg.gene_symbol
";
dump_data
(
$
litedb
,
$sql
,
$dumpdir
,
'
gene
.dat
'
);
dump_data
(
$
sql
,
$satdb
,
'
gene
'
);
$sql
=
"
SELECT dd.*
FROM
$
disease
db
.gene dg,
$
disease
db
.disease dd,
FROM
$
sat
db
.gene dg,
$
sat
db
.disease dd,
$litedb
.gene lg,
$litedb
.gene_xref lgx
WHERE lg.chr_name = '
$chr
'
...
...
@@ -141,14 +155,14 @@ WHERE lg.chr_name = '$chr'
AND lgx.display_id = dg.gene_symbol
AND dd.id = dg.id;
";
dump_data
(
$
litedb
,
$sql
,
$dumpdir
,
'
disease
.dat
'
);
dump_data
(
$
sql
,
$satdb
,
'
disease
'
);
# here's the sql to restrict the disease_index_*list, but they're so small
# it's really not worth the trouble. Left here in case anyone is interested
# $sql = "
# SELECT ddl.*
# FROM $
disease
db.gene dg,
# $
disease
db.disease_index_doclist ddl,
# FROM $
sat
db.gene dg,
# $
sat
db.disease_index_doclist ddl,
# $litedb.gene lg,
# $litedb.gene_xref lgx
# WHERE lg.chr_name = '$chr'
...
...
@@ -159,15 +173,17 @@ WHERE lg.chr_name = '$chr'
foreach
my
$w
(
qw(doc stop vector word)
)
{
my
$table
=
"
disease_index_
${w}
list
";
$sql
=
"
select * from
$
disease
db
.
$table
";
dump_data
(
$
litedb
,
$sql
,
$dumpdir
,
"
$table
.dat
"
);
$sql
=
"
select * from
$
sat
db
.
$table
";
dump_data
(
$
sql
,
$satdb
,
$table
);
}
}
# disease
if
(
$mapsdb
)
{
# note: this will ignore non-RHdb markers; tough.
my
$dumpdir
=
"
$workdir
/
$mapsdb
";
dump_schema
(
$mapsdb
,
$dumpdir
,
'
maps.sql
');
sub
dump_maps
{
my
(
$satdb
)
=
@_
;
return
unless
$satdb
;
warn
"
ignoring non-RHdb markers !
\n
";
dump_schema
(
$satdb
);
my
$chr_short
=
$chr
;
$chr_short
=~
s/^chr//
;
...
...
@@ -177,40 +193,93 @@ if ($mapsdb) {
# the simple ones having a chromosome column:
foreach
my
$table
(
qw(ChromosomeBands CytogeneticMap RHMaps Fpc_Contig)
)
{
$sql
=
"
SELECT * FROM
$
maps
db
.
$table
WHERE chromosome = '
$chr_short
'
SELECT * FROM
$
sat
db
.
$table
WHERE chromosome = '
$chr_short
'
";
dump_data
(
$
litedb
,
$sql
,
$dumpdir
,
"
$table
.dat
"
);
dump_data
(
$
sql
,
$satdb
,
$table
);
}
$sql
=
"
SELECT * FROM
$
maps
db
.Map
";
# 4 rows
dump_data
(
$
litedb
,
$sql
,
$dumpdir
,
"
$table
.dat
"
);
$sql
=
"
SELECT * FROM
$
sat
db
.Map
";
# 4 rows
dump_data
(
$
sql
,
$satdb
,
$table
);
# less simple ones that can both use the RHMaps table
foreach
my
$table
(
qw(Marker MarkerSynonym)
)
{
$sql
=
"
SELECT t.*
FROM
$
maps
db
.
$table
t,
$
maps
db
.RHMaps r
FROM
$
sat
db
.
$table
t,
$
sat
db
.RHMaps r
WHERE t.marker=r.marker
AND r.chromosome = '
$chr_short
'
";
dump_data
(
$
litedb
,
$sql
,
$dumpdir
,
"
$table
.dat
"
);
dump_data
(
$
sql
,
$satdb
,
$table
);
}
# this one needs a join
$sql
=
"
SELECT cl.*
FROM
$
maps
db
.Fpc_Clone cl,
$
maps
db
.Fpc_Contig cg
FROM
$
sat
db
.Fpc_Clone cl,
$
sat
db
.Fpc_Contig cg
WHERE cg.chromosome = '
$chr_short
'
AND cl.contig_id = cg.contig_id
";
dump_data
(
$
litedb
,
$sql
,
$dumpdir
,
'
Fpc_Clone
.dat
'
);
dump_data
(
$
sql
,
$satdb
,
'
Fpc_Clone
'
);
}
# maps
sub
dump_expression
{
my
(
$satdb
)
=
@_
;
return
unless
$satdb
;
warn
"
ignoring any non-ENSG aliases
";
my
$dumpdir
=
"
$workdir
/
$satdb
";
dump_schema
(
$satdb
);
# small ones:
foreach
$table
(
qw(key_word lib_key library source )
)
{
$sql
=
"
select * from
$satdb
.
$table
";
dump_data
(
$sql
,
$satdb
,
$table
);
}
# frequency ;
# seqtag ;
# seqtag_alias ;
$sql
=
"
SELECT sa.*
FROM
$satdb
.seqtag_alias sa,
$litedb
.gene lg
WHERE sa.db_name = 'ensgene'
AND sa.external_name =lg.name
AND lg.chr_name = '
$chr
'
";
dump_data
(
$sql
,
$satdb
,
'
seqtag_alias
');
$sql
=
"
SELECT st.*
FROM
$satdb
.seqtag st,
$satdb
.seqtag_alias sa,
$litedb
.gene lg
WHERE sa.db_name = 'ensgene'
AND sa.external_name =lg.name
AND lg.chr_name = '
$chr
'
AND st.seqtag_id = sa.seqtag_id
";
dump_data
(
$sql
,
$satdb
,
'
seqtag
');
$sql
=
"
SELECT f.*
FROM
$satdb
.frequency f,
$satdb
.seqtag_alias sa,
$litedb
.gene lg
WHERE sa.db_name = 'ensgene'
AND sa.external_name =lg.name
AND lg.chr_name = '
$chr
'
AND f.seqtag_id = sa.seqtag_id
";
dump_data
(
$sql
,
$satdb
,
'
frequency
');
}
# expression
sub
dump_schema
{
my
(
$dbinstance
,
$destdir
,
$destfile
)
=
@_
;
my
(
$satdb
)
=
@_
;
my
$destdir
=
"
$workdir
/
$satdb
";
my
$destfile
=
"
$satdb
.sql
";
unless
(
-
d
$destdir
)
{
mkdir
$destdir
,
0755
||
die
"
mkdir
$destdir
: $!
";
...
...
@@ -218,16 +287,18 @@ sub dump_schema {
my
$d
=
"
$destdir
/
$destfile
";
warn
"
Dumping database schema of
$
dbinstance
to
$d
\n
";
warn
"
Dumping database schema of
$
satdb
to
$d
\n
";
die
"
$d
exists
"
if
-
s $d ;
$command = "$m
ysqldump
-
u
$dbuser
$pass_arg
-
d
$
dbinstance
>
$d
"
;
$command = "$m
ysqldump
-
u
$dbuser
$pass_arg
-
d
$
satdb
>
$d
"
;
if ( system(
$command
) ) {
die
"
Error:
``
$command
''
ended
with
exit
status
$?
"
;
}
}
sub dump_data {
my(
$db
,
$sql
,
$destdir
,
$destfile
) =
@_
;
my(
$sql
,
$satdb
,
$tablename
) =
@_
;
my (
$destdir
) =
"
$workdir
/
$satdb
"
;
my (
$datfile
)=
"
$tablename
.
dat
"
;
unless (-d
$destdir
) {
mkdir
$destdir
, 0755 || die
"
mkdir
$destdir:
$!
"
;
...
...
@@ -235,7 +306,7 @@ sub dump_data {
$sql
=~ s/
\
s+/ /g;
my
$cmd
=
"
echo
\
"
$sql
\"
|
$mysql
-q --batch -u
$dbuser
-p
$dbpass
$db
>
$destdir
/
$d
es
tfile
";
my
$cmd
=
"
echo
\
"
$sql
\"
|
$mysql
-q --batch -u
$dbuser
-p
$dbpass
$
lite
db
>
$destdir
/
$d
a
tfile
";
warn
"
dumping:
$cmd
\n
";
if
(
system
(
$cmd
)
)
{
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment