Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
e40c7ca0
Commit
e40c7ca0
authored
20 years ago
by
Arne Stabenau
Browse files
Options
Downloads
Patches
Plain Diff
essentially rewritten, should be useless soon thanks to Vivek and Craig
parent
ee34ddf3
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
sql/transfer_misc_affy.pl
+132
-62
132 additions, 62 deletions
sql/transfer_misc_affy.pl
with
132 additions
and
62 deletions
sql/transfer_misc_affy.pl
+
132
−
62
View file @
e40c7ca0
...
...
@@ -10,13 +10,21 @@ my $affy_array; #hash containing the relation between misc_set_id => affy_array_
my
$probe_feature
;
#hash containing the information relevant to the affy_feature table
my
$affy_probe
;
#hash containing the information relevant to the affy_probe table: misc_set_id -> [probeset,probename]
my
$affy_probe_id
=
1
;
#first affy_probe_id in the affy_probe table
# unbeknown to me there are actually probesets that contain the same probe twice.
# for the conversion we have to take them out, it will work better when the features
# and probes are generate from the fasta files directly
my
%kill_probeset
=
(
'
892_at
'
=>
1
);
{
my
(
$chost
,
$cuser
,
$cpass
,
$cport
,
$cdbname
);
#ensembl core db
GetOptions
('
c
host=s
'
=>
\
$chost
,
'
c
user=s
'
=>
\
$cuser
,
'
c
pass=s
'
=>
\
$cpass
,
'
c
port=i
'
=>
\
$cport
,
'
c
dbname=s
'
=>
\
$cdbname
,
GetOptions
('
host=s
'
=>
\
$chost
,
'
user=s
'
=>
\
$cuser
,
'
pass=s
'
=>
\
$cpass
,
'
port=i
'
=>
\
$cport
,
'
dbname=s
'
=>
\
$cdbname
,
'
tmpdir=s
'
=>
\
$tmp_dir
);
#by default, connect to the stagging server at ecs2:3364
...
...
@@ -62,68 +70,130 @@ sub populate_affy_array{
}
sub
populate_probe_info
{
my
$dbCore
=
shift
;
my
$dbCore
=
shift
;
my
(
$seq_region_id
,
$seq_region_start
,
$seq_region_end
,
$seq_region_strand
,
$misc_set_id
,
$complete_probename
,
$mismatch
);
my
(
$affy_name
,
$affy_probeset
,
$probename
);
my
(
$seq_region_id
,
$seq_region_start
,
$seq_region_end
,
$seq_region_strand
,
$misc_set_id
,
$value
,
$mismatch
);
my
(
$affy_name
,
$affy_probeset
,
$probename
);
#attributes in the value field for an attrib_type_id = 5
my
$previous_seq_region_id
=
0
;
my
$previous_seq_region_start
=
0
;
my
$old_affy_probe
;
my
$probe_set
;
print
STDERR
"
Going to get affy information....
\n
";
my
$sth
=
$dbCore
->
dbc
()
->
prepare
(
qq{SELECT STRAIGHT_JOIN seq_region_id, seq_region_start, seq_region_end, seq_region_strand, mff.misc_set_id, ma1.value, (IF (ma2.value = 'Mismatch',1,0)) as mismatch
FROM misc_feature mf, misc_attrib ma1, misc_attrib ma2, misc_feature_misc_set mff, attrib_type at1, attrib_type at2, misc_set ms
WHERE ma2.misc_feature_id = mf.misc_feature_id
AND ma2.attrib_type_id = at2.attrib_type_id
AND at2.code = 'matchStatus'
AND ma1.attrib_type_id = at1.attrib_type_id
AND at1.code = 'probeName'
AND ma1.misc_feature_id = mf.misc_feature_id
AND mf.misc_feature_id = mff.misc_feature_id
AND ms.misc_set_id = mff.misc_set_id
AND ms.code <> 'All_Affy'
ORDER BY seq_region_id,seq_region_start
}
);
my
$previous_seq_region_id
=
-
1
;
my
$previous_seq_region_start
=
-
1
;
my
$old_affy_probe
;
my
$probe_set
;
print
STDERR
"
Going to get affy information....
\n
";
my
$sql
=
qq{
SELECT STRAIGHT_JOIN seq_region_id, seq_region_start, seq_region_end,
seq_region_strand, mff.misc_set_id, ma1.value,
(IF (ma2.value = 'Mismatch',1,0)) as mismatch
FROM misc_feature mf, misc_attrib ma1, misc_attrib ma2,
misc_feature_misc_set mff, attrib_type at1,
attrib_type at2, misc_set ms
WHERE ma2.misc_feature_id = mf.misc_feature_id
AND ma2.attrib_type_id = at2.attrib_type_id
AND at2.code = 'matchStatus'
AND ma1.attrib_type_id = at1.attrib_type_id
AND at1.code = 'probeName'
AND ma1.misc_feature_id = mf.misc_feature_id
AND mf.misc_feature_id = mff.misc_feature_id
AND ms.misc_set_id = mff.misc_set_id
AND ms.code <> 'All_Affy'
ORDER BY seq_region_id,seq_region_start }
;
print
STDERR
"
Ready to create affy files
\n
";
open
FEATURE
,
"
>
$tmp_dir
/affy_feature_$$
\
.txt
";
open
PROBE
,
"
>
$tmp_dir
/affy_probe_$$
\
.txt
";
my
%stored_probes
;
my
$current_probe_id
=
1
;
my
%merge_cache
;
my
$merge_key
;
# merging of probes is only allowes for mismatch = 0
for
my
$mismatch_process
(
0
..
1
)
{
my
$sth
=
$dbCore
->
dbc
()
->
prepare
(
$sql
);
$sth
->
{
mysql_use_result
}
=
1
;
$sth
->
execute
();
$sth
->
bind_columns
(
\
$seq_region_id
,
\
$seq_region_start
,
\
$seq_region_end
,
\
$seq_region_strand
,
\
$misc_set_id
,
\
$value
,
\
$mismatch
);
print
STDERR
"
Ready to create affy files
\n
";
open
FEATURE
,
"
>
$tmp_dir
/affy_feature_$$
\
.txt
";
open
PROBE
,
"
>
$tmp_dir
/affy_probe_$$
\
.txt
";
$sth
->
bind_columns
(
\
$seq_region_id
,
\
$seq_region_start
,
\
$seq_region_end
,
\
$seq_region_strand
,
\
$misc_set_id
,
\
$complete_probename
,
\
$mismatch
);
my
$prev_seq_region_id
=
-
1
;
my
$prev_start
=
-
1
;
while
(
$sth
->
fetch
()){
#we have a new probe, add the previous one to the database, and flush the structures
unless
(((
$previous_seq_region_id
==
$seq_region_id
)
or
(
$previous_seq_region_id
==
0
))
and
((
$previous_seq_region_start
==
$seq_region_start
)
or
(
$previous_seq_region_start
==
0
))){
foreach
my
$key
(
keys
%
{
$affy_probe
}){
if
(
!
exists
$probe_set
->
{
$affy_probeset
.
"
:
"
.
$affy_probe
->
{
$key
}}){
$probe_set
->
{
$affy_probeset
.
"
:
"
.
$affy_probe
->
{
$key
}}
=
$affy_probe_id
;
print
PROBE
join
("
\t
",
$affy_probe_id
,
$affy_array
->
{
$key
},
$affy_probeset
,
$affy_probe
->
{
$key
}),"
\n
";
}
$old_affy_probe
=
$probe_set
->
{
$affy_probeset
.
"
:
"
.
$affy_probe
->
{
$key
}};
}
#insert all the affy_probe values in the file
print
FEATURE
join
("
\t
",
$probe_feature
->
{'
seq_region_id
'},
$probe_feature
->
{'
seq_region_start
'},
$probe_feature
->
{'
seq_region_end
'},
$probe_feature
->
{'
seq_region_strand
'},
$probe_feature
->
{'
mismatches
'},
$old_affy_probe
),"
\n
";
$affy_probeset
=
'';
$affy_probe_id
++
;
$affy_probe
=
();
$probe_feature
=
();
next
unless
(
$mismatch
==
$mismatch_process
);
# flush the merge cache regularly
if
(
$prev_start
!=
$seq_region_start
||
$prev_seq_region_id
!=
$seq_region_id
)
{
%merge_cache
=
();
$prev_start
=
$seq_region_start
;
$prev_seq_region_id
=
$seq_region_id
;
}
my
(
$affy_name
,
$affy_probeset
,
$probename
)
=
split
/:/
,
$complete_probename
,
3
;
if
(
$kill_probeset
{
$affy_probeset
}
)
{
next
;
}
# first check wether we have to store probe information
my
$probe_id
=
$stored_probes
{
$complete_probename
};
if
(
!
$mismatch
)
{
$merge_key
=
join
(
"
-
",
$seq_region_id
,
$seq_region_start
,
$seq_region_end
,
$seq_region_strand
,
$mismatch
,
$affy_probeset
);
}
if
(
!
defined
$probe_id
)
{
# probe information needs to be stored, but new probe_id or existing one?
$probe_id
=
$merge_cache
{
$merge_key
};
if
((
!
defined
$probe_id
)
||
$mismatch
)
{
$probe_id
=
$current_probe_id
++
;
}
$previous_seq_region_id
=
$seq_region_id
;
$previous_seq_region_start
=
$seq_region_start
;
(
$affy_name
,
$affy_probeset
,
$probename
)
=
split
/:/
,
$value
,
3
;
$affy_probe
->
{
$misc_set_id
}
=
$probename
;
$probe_feature
->
{'
seq_region_id
'}
=
$seq_region_id
;
$probe_feature
->
{'
seq_region_start
'}
=
$seq_region_start
;
$probe_feature
->
{'
seq_region_end
'}
=
$seq_region_end
;
$probe_feature
->
{'
seq_region_strand
'}
=
$seq_region_strand
;
$probe_feature
->
{'
mismatches
'}
=
$mismatch
;
}
print
PROBE
join
(
"
\t
",
$probe_id
,
$affy_array
->
{
$misc_set_id
},
$affy_probeset
,
$probename
),"
\n
";
$stored_probes
{
$complete_probename
}
=
$probe_id
;
}
# at this point the probe_id is correct, it might already be clear that the
# feature doesnt need storing (there is already a merge cache entry for
# this position.
# do we want to store the feature ?
# if its already stored with that probe_id its in the
# merge_cache no addition feature is needed
if
(
$mismatch
)
{
$merge_key
=
join
(
"
-
",
$probe_id
,
$seq_region_id
,
$seq_region_start
,
$seq_region_end
,
$seq_region_strand
);
}
if
(
exists
$merge_cache
{
$merge_key
}
)
{
# this one is already stored
}
else
{
$merge_cache
{
$merge_key
}
=
$probe_id
;
print
FEATURE
join
("
\t
",
$seq_region_id
,
$seq_region_start
,
$seq_region_end
,
$seq_region_strand
,
$mismatch
,
$probe_id
),"
\n
";
}
}
$sth
->
finish
();
close
FEATURE
;
close
PROBE
;
#and finally import the information
print
STDERR
"
Loading new affy information
\n
";
load
(
$dbCore
,"
$tmp_dir
/affy_feature_$$
\
.txt
",
qw(affy_feature seq_region_id seq_region_start seq_region_end seq_region_strand mismatches affy_probe_id)
);
load
(
$dbCore
,"
$tmp_dir
/affy_probe_$$
\
.txt
",
qw(affy_probe affy_probe_id affy_array_id probeset name)
);
}
close
FEATURE
;
close
PROBE
;
#and finally import the information
print
STDERR
"
Loading new affy information
\n
";
load
(
$dbCore
,"
$tmp_dir
/affy_feature_$$
\
.txt
",
qw(affy_feature seq_region_id seq_region_start seq_region_end seq_region_strand mismatches affy_probe_id)
);
load
(
$dbCore
,"
$tmp_dir
/affy_probe_$$
\
.txt
",
qw(affy_probe affy_probe_id affy_array_id probeset name)
);
}
...
...
@@ -135,7 +205,7 @@ sub load{
my
$cols
=
join
(
"
,
",
@colnames
);
my
$sql
=
qq{
LOAD DATA
LOCAL
INFILE '$file'
LOAD DATA INFILE '$file'
INTO TABLE $tablename ($cols)
}
;
$dbCore
->
dbc
()
->
do
(
$sql
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment