Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
bac764a0
Commit
bac764a0
authored
20 years ago
by
Steve Trevanion
Browse files
Options
Downloads
Patches
Plain Diff
inherits from ../DanioRerio.pm
parent
c0a4dc6a
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
misc-scripts/surgery/SeqStoreConverter/vega/DanioRerio.pm
+2
-399
2 additions, 399 deletions
misc-scripts/surgery/SeqStoreConverter/vega/DanioRerio.pm
with
2 additions
and
399 deletions
misc-scripts/surgery/SeqStoreConverter/vega/DanioRerio.pm
+
2
−
399
View file @
bac764a0
use
strict
;
use
warnings
;
use
SeqStoreConverter::
BasicConverter
;
use
SeqStoreConverter::
DanioRerio
;
package
SeqStoreConverter::vega::
DanioRerio
;
use
vars
qw(@ISA)
;
@ISA
=
qw(SeqStoreConverter::BasicConverter)
;
sub
create_coord_systems
{
my
$self
=
shift
;
$self
->
debug
("
DanioRerio Specific: creating chromosome, supercontig, clone
"
.
"
and chunk coordinate systems
");
my
$target
=
$self
->
target
();
my
$dbh
=
$self
->
dbh
();
my
$ass_def
=
$self
->
get_default_assembly
();
my
@coords
=
(["
chromosome
"
,
$ass_def
,
"
default_version
",
1
],
["
supercontig
",
$ass_def
,
"
default_version
",
2
],
["
clone
"
,
undef
,
"
default_version
",
3
],
["
chunk
"
,
undef
,
"
default_version,sequence_level
",
4
]);
my
@assembly_mappings
=
("
chromosome:
$ass_def
|chunk
",
"
clone|chunk
",
"
supercontig:
$ass_def
|chunk
",
"
chromosome:
$ass_def
|chunk|clone
",
"
supercontig:
$ass_def
|chunk|clone
",
"
chromosome:
$ass_def
|chunk|supercontig
");
my
%cs
=
(
gene
=>
['
supercontig
','
chromosome
'],
transcript
=>
['
supercontig
','
chromosome
'],
exon
=>
['
supercontig
','
chromosome
'],
dna_align_feature
=>
['
chunk
'],
protein_align_feature
=>
['
chunk
'],
marker_feature
=>
['
chunk
'],
simple_feature
=>
['
chunk
'],
repeat_feature
=>
['
chunk
'],
qtl_feature
=>
['
chunk
'],
misc_feature
=>
['
chunk
'],
prediction_transcript
=>
['
chunk
'],
prediction_exon
=>
['
chunk
'],
karyotype
=>
['
chromosome
']);
$self
->
debug
("
Building coord_system table
");
my
$sth
=
$dbh
->
prepare
("
INSERT INTO
$target
.coord_system (name, version, attrib, rank)
"
.
"
VALUES (?,?,?,?)
");
my
%coord_system_ids
;
foreach
my
$cs
(
@coords
)
{
$sth
->
execute
(
@$cs
);
$coord_system_ids
{
$cs
->
[
0
]}
=
$sth
->
{'
mysql_insertid
'};
}
$sth
->
finish
();
$self
->
debug
("
Building meta_coord table
");
$sth
=
$dbh
->
prepare
("
INSERT INTO
$target
.meta_coord VALUES (?, ?)
");
foreach
my
$feature_type
(
keys
%cs
)
{
foreach
my
$coord_sys
(
@
{
$cs
{
$feature_type
}})
{
$sth
->
execute
(
$feature_type
,
$coord_system_ids
{
$coord_sys
});
}
}
$sth
->
finish
();
$self
->
debug
("
Adding assembly.mapping entries to meta table
");
$sth
=
$dbh
->
prepare
("
INSERT INTO
$target
.meta(meta_key, meta_value)
"
.
"
VALUES ('assembly.mapping', ?)
");
foreach
my
$mapping
(
@assembly_mappings
)
{
$sth
->
execute
(
$mapping
);
}
$sth
->
finish
();
return
;
}
sub
create_seq_regions
{
my
$self
=
shift
;
my
$source
=
$self
->
source
();
my
$target
=
$self
->
target
();
my
$dbh
=
$self
->
dbh
();
#
# Turn all of the contents of the contig table into 'chunks' and
# give them arbitrary names like chunk1, chunk2. Keep old internal
# ids for conveneience.
#
$self
->
debug
("
DanioRerio Specific: creating chunk seq_regions
");
my
$sth
=
$dbh
->
prepare
("
INSERT INTO
$target
.seq_region (seq_region_id, name, coord_system_id,
"
.
"
length)
"
.
"
SELECT ctg.contig_id, concat('chunk', ctg.contig_id),
"
.
"
cs.coord_system_id, ctg.length
"
.
"
FROM
$source
.contig ctg,
$target
.coord_system cs
"
.
"
WHERE cs.name = 'chunk'
");
$sth
->
execute
();
$sth
->
finish
();
my
$insert_sth
=
$dbh
->
prepare
("
INSERT INTO
$target
.seq_region (name, coord_system_id, length)
"
.
"
VALUES (?,?,?)
");
my
$tmp_chr_insert_sth
=
$dbh
->
prepare
("
INSERT INTO
$target
.tmp_chr_map (old_id, new_id) VALUES (?, ?)
");
my
$tmp_supercontig_insert_sth
=
$dbh
->
prepare
("
INSERT INTO
$target
.tmp_superctg_map (name, new_id) VALUES (?,?)
");
my
$tmp_clone_insert_sth
=
$dbh
->
prepare
("
INSERT INTO
$target
.tmp_cln_map (old_id, new_id) VALUES (?,?)
");
#
# create a temporary table to hold the ids of all 'toplevel'
# seq_regions. Keep the old chromosome_id, and the new seq_region_id
#
$dbh
->
do
("
CREATE TEMPORARY TABLE
$target
.tmp_toplevel_map
"
.
"
(old_id INT, new_id INT, INDEX new_idx(new_id), INDEX old_idx(old_id))
");
my
$tmp_toplevel_insert_sth
=
$dbh
->
prepare
("
INSERT INTO
$target
.tmp_toplevel_map (old_id, new_id) VALUES (?,?)
");
#
# Turn real clones into clones
#
$self
->
debug
("
DanioRerio Specific: creating clone seq_regions
");
my
$select_sth
=
$dbh
->
prepare
("
SELECT ctg.contig_id, ctg.name, ctg.length
"
.
"
FROM
$source
.contig ctg
"
.
"
WHERE ctg.name not like 'ctg%' and ctg.name not like 'NA%'
");
my
$cs_id
=
$self
->
get_coord_system_id
('
clone
');
$select_sth
->
execute
();
my
(
$old_id
,
$name
,
$length
);
$select_sth
->
bind_columns
(
\
$old_id
,
\
$name
,
\
$length
);
while
(
$select_sth
->
fetch
())
{
#insert into seq_region table
$insert_sth
->
execute
(
$name
,
$cs_id
,
$length
);
#copy old/new mapping into temporary table
$tmp_clone_insert_sth
->
execute
(
$old_id
,
$insert_sth
->
{'
mysql_insertid
'});
}
$select_sth
->
finish
();
#
# Turn real chromosomes into chromosomes
#
$self
->
debug
("
DanioRerio Specific: creating chromosome seq_regions
");
$select_sth
=
$dbh
->
prepare
("
SELECT chr.chromosome_id, chr.name, chr.length
"
.
"
FROM
$source
.chromosome chr
"
.
"
WHERE length(chr.name) <= 2
");
$cs_id
=
$self
->
get_coord_system_id
('
chromosome
');
$select_sth
->
execute
();
$select_sth
->
bind_columns
(
\
$old_id
,
\
$name
,
\
$length
);
my
%chr_id_added
;
while
(
$select_sth
->
fetch
())
{
#insert into seq_region table
$insert_sth
->
execute
(
$name
,
$cs_id
,
$length
);
#copy old/new mapping into temporary table
my
$new_id
=
$insert_sth
->
{'
mysql_insertid
'};
$tmp_chr_insert_sth
->
execute
(
$old_id
,
$new_id
);
$tmp_toplevel_insert_sth
->
execute
(
$old_id
,
$new_id
);
$chr_id_added
{
$old_id
}
=
1
;
}
$select_sth
->
finish
();
#
# Turn supercontigs into supercontigs
#
$self
->
debug
("
DanioRerio Specific: creating supercontig seq_regions
");
$select_sth
=
$dbh
->
prepare
("
SELECT a.chromosome_id, a.superctg_name,
"
.
"
MAX(a.chr_end) - MIN(a.chr_start) + 1
"
.
"
FROM
$source
.assembly a,
$target
.coord_system cs
"
.
"
GROUP BY a.superctg_name
");
$select_sth
->
execute
();
$select_sth
->
bind_columns
(
\
$old_id
,
\
$name
,
\
$length
);
$cs_id
=
$self
->
get_coord_system_id
('
supercontig
');
while
(
$select_sth
->
fetch
())
{
#insert into seq_region table
$insert_sth
->
execute
(
$name
,
$cs_id
,
$length
);
#copy old/new mapping into temporary table
my
$new_id
=
$insert_sth
->
{'
mysql_insertid
'};
$tmp_supercontig_insert_sth
->
execute
(
$name
,
$new_id
);
if
(
!
$chr_id_added
{
$old_id
})
{
$chr_id_added
{
$old_id
}
=
1
;
$tmp_toplevel_insert_sth
->
execute
(
$old_id
,
$new_id
);
}
}
$select_sth
->
finish
();
$tmp_chr_insert_sth
->
finish
();
$tmp_supercontig_insert_sth
->
finish
();
$tmp_clone_insert_sth
->
finish
();
$tmp_toplevel_insert_sth
->
finish
();
$insert_sth
->
finish
();
}
sub
create_assembly
{
my
$self
=
shift
;
#chromosomes are made of chunks
$self
->
assembly_contig_chromosome
();
#supercontigs are made of chunks
$self
->
assembly_contig_supercontig
();
#clones are made of chunks
$self
->
assembly_contig_clone
();
return
;
}
sub
assembly_contig_clone
{
my
$self
=
shift
;
$self
->
debug
("
DanioRerio Specific: building assembly table - chunk/clone
");
#this is easy, there is simply one entire chunk for a given clone
my
$source
=
$self
->
source
();
my
$target
=
$self
->
target
();
my
$dbh
=
$self
->
dbh
();
$dbh
->
do
("
INSERT INTO
$target
.assembly (asm_seq_region_id, cmp_seq_region_id,
"
.
"
asm_start, asm_end, cmp_start, cmp_end, ori)
"
.
"
SELECT tcm.new_id, tcm.old_id, 1, sr.length, 1, sr.length, 1
"
.
"
FROM
$target
.tmp_cln_map tcm,
$target
.seq_region sr
"
.
"
WHERE sr.seq_region_id = tcm.new_id
");
}
# we need to override the transfer of the genes since danio genes can be on
# supercontigs and on chromosomes
sub
transfer_genes
{
my
$self
=
shift
;
my
$target
=
$self
->
target
();
my
$source
=
$self
->
source
();
my
$dbh
=
$self
->
dbh
();
#
# Transfer the gene table
#
$self
->
debug
("
DanioRerio Specific: Building gene table
");
# first transfer genes on chromosomes
$dbh
->
do
("
INSERT INTO
$target
.gene
"
.
"
SELECT g.gene_id, g.type, g.analysis_id, toplev.new_id,
"
.
"
MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start),
"
.
"
(a.chr_start+a.contig_end-e.contig_end ))) as start,
"
.
"
MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start),
"
.
"
(a.chr_start+a.contig_end-e.contig_start))) as end,
"
.
"
a.contig_ori*e.contig_strand as strand,
"
.
"
g.display_xref_id
"
.
"
FROM
$source
.transcript t,
$source
.exon_transcript et,
"
.
"
$source
.exon e,
$source
.assembly a,
$source
.gene g,
"
.
"
$target
.tmp_toplevel_map toplev
"
.
"
WHERE t.transcript_id = et.transcript_id
"
.
"
AND et.exon_id = e.exon_id
"
.
"
AND e.contig_id = a.contig_id
"
.
"
AND g.gene_id = t.gene_id
"
.
"
AND a.chromosome_id = toplev.old_id
"
.
"
GROUP BY g.gene_id
");
#
# Transfer the transcript table
#
$self
->
debug
("
DanioRerio Specific: Building transcript table
");
$dbh
->
do
("
INSERT INTO
$target
.transcript
"
.
"
SELECT t.transcript_id, t.gene_id, toplev.new_id,
"
.
"
MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start),
"
.
"
(a.chr_start+a.contig_end-e.contig_end ))) as start,
"
.
"
MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start),
"
.
"
(a.chr_start+a.contig_end-e.contig_start))) as end,
"
.
"
a.contig_ori*e.contig_strand as strand,
"
.
"
t.display_xref_id
"
.
"
FROM
$source
.transcript t,
$source
.exon_transcript et,
"
.
"
$source
.exon e,
$source
.assembly a,
"
.
"
$target
.tmp_toplevel_map toplev
"
.
"
WHERE t.transcript_id = et.transcript_id
"
.
"
AND et.exon_id = e.exon_id
"
.
"
AND e.contig_id = a.contig_id
"
.
"
AND a.chromosome_id = toplev.old_id
"
.
"
GROUP BY t.transcript_id
");
#
# Transfer the exon table
#
$self
->
debug
("
DanioRerio Specific: Building exon table
");
$dbh
->
do
("
INSERT INTO
$target
.exon
"
.
"
SELECT e.exon_id, toplev.new_id,
"
.
"
MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start),
"
.
"
(a.chr_start+a.contig_end-e.contig_end ))) as start,
"
.
"
MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start),
"
.
"
(a.chr_start+a.contig_end-e.contig_start))) as end,
"
.
"
a.contig_ori*e.contig_strand as strand,
"
.
"
e.phase, e.end_phase
"
.
"
FROM
$source
.transcript t,
$source
.exon_transcript et,
"
.
"
$source
.exon e,
$source
.assembly a,
$source
.gene g,
"
.
"
$target
.tmp_toplevel_map toplev
"
.
"
WHERE t.transcript_id = et.transcript_id
"
.
"
AND et.exon_id = e.exon_id
"
.
"
AND e.contig_id = a.contig_id
"
.
"
AND g.gene_id = t.gene_id
"
.
"
AND a.chromosome_id = toplev.old_id
"
.
"
GROUP BY e.exon_id
");
#
# Transfer translation table
#
$self
->
debug
("
Building translation table
");
$dbh
->
do
("
INSERT INTO
$target
.translation
"
.
"
SELECT tl.translation_id, ts.transcript_id, tl.seq_start,
"
.
"
tl.start_exon_id, tl.seq_end, tl.end_exon_id
"
.
"
FROM
$source
.transcript ts,
$source
.translation tl
"
.
"
WHERE ts.translation_id = tl.translation_id
");
return
;
}
sub
set_top_level
{
my
$self
=
shift
;
my
$target
=
$self
->
target
();
my
$dbh
=
$self
->
dbh
();
my
$attrib_type_id
=
$self
->
add_attrib_code
();
$self
->
debug
("
DanioRerio Specific: Setting toplevel attributes of
"
.
"
seq_regions
");
my
$sth
=
$dbh
->
prepare
("
DELETE FROM
$target
.seq_region_attrib
"
.
"
WHERE attrib_type_id = ?
");
$sth
->
execute
(
$attrib_type_id
);
$sth
->
finish
();
$sth
=
$dbh
->
prepare
("
INSERT INTO
$target
.seq_region_attrib
"
.
'
(seq_region_id, attrib_type_id, value)
'
.
"
SELECT toplev.new_id,
$attrib_type_id
, 1
"
.
"
FROM
$target
.tmp_toplevel_map toplev
");
$sth
->
execute
();
$sth
->
finish
();
}
@ISA
=
qw(SeqStoreConverter::DanioRerio)
;
sub
copy_other_tables
{
my
$self
=
shift
;
...
...
@@ -505,8 +112,4 @@ sub update_clone_info {
return
;
}
1
;
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment