Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
ensembl-web
ensembl-client
Commits
6eb971c5
Unverified
Commit
6eb971c5
authored
Feb 24, 2021
by
Andrey Azov
Committed by
GitHub
Feb 24, 2021
Browse files
Download sequences in plain text and transform them to FASTA in web worker (#450)
parent
bccc4446
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
313 additions
and
94 deletions
+313
-94
src/ensembl/package-lock.json
src/ensembl/package-lock.json
+37
-0
src/ensembl/package.json
src/ensembl/package.json
+3
-1
src/ensembl/src/shared/components/instant-download/instant-download-fetch/fetchForProtein.ts
...nstant-download/instant-download-fetch/fetchForProtein.ts
+52
-32
src/ensembl/src/shared/components/instant-download/instant-download-fetch/fetchForTranscript.ts
...ant-download/instant-download-fetch/fetchForTranscript.ts
+66
-46
src/ensembl/src/shared/components/instant-download/instant-download-fetch/fetchSequenceChecksums.ts
...download/instant-download-fetch/fetchSequenceChecksums.ts
+55
-13
src/ensembl/src/shared/helpers/formatters/fastaFormatter.test.ts
...embl/src/shared/helpers/formatters/fastaFormatter.test.ts
+29
-0
src/ensembl/src/shared/helpers/formatters/fastaFormatter.ts
src/ensembl/src/shared/helpers/formatters/fastaFormatter.ts
+20
-0
src/ensembl/src/shared/workers/sequenceFetcher.worker.ts
src/ensembl/src/shared/workers/sequenceFetcher.worker.ts
+47
-0
src/ensembl/webpack/environments/webpack.common.js
src/ensembl/webpack/environments/webpack.common.js
+4
-2
No files found.
src/ensembl/package-lock.json
View file @
6eb971c5
...
...
@@ -8122,6 +8122,12 @@
"delayed-stream": "~1.0.0"
}
},
"comlink": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/comlink/-/comlink-4.3.0.tgz",
"integrity": "sha512-mu4KKKNuW8TvkfpW/H88HBPeILubBS6T94BdD1VWBXNXfiyqVtwUCVNO1GeNOBTsIswzsMjWlycYr+77F5b84g==",
"dev": true
},
"comma-separated-tokens": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-1.0.8.tgz",
...
...
@@ -28511,6 +28517,37 @@
"errno": "~0.1.7"
}
},
"worker-plugin": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/worker-plugin/-/worker-plugin-5.0.0.tgz",
"integrity": "sha512-AXMUstURCxDD6yGam2r4E34aJg6kW85IiaeX72hi+I1cxyaMUtrvVY6sbfpGKAj5e7f68Acl62BjQF5aOOx2IQ==",
"dev": true,
"requires": {
"loader-utils": "^1.1.0"
},
"dependencies": {
"json5": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz",
"integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==",
"dev": true,
"requires": {
"minimist": "^1.2.0"
}
},
"loader-utils": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.4.0.tgz",
"integrity": "sha512-qH0WSMBtn/oHuwjy/NucEgbx5dbxxnxup9s4PVXJUDHZBQY+s0NWA9rJf53RBnQZxfch7euUui7hpoAPvALZdA==",
"dev": true,
"requires": {
"big.js": "^5.2.2",
"emojis-list": "^3.0.0",
"json5": "^1.0.1"
}
}
}
},
"worker-rpc": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/worker-rpc/-/worker-rpc-0.1.1.tgz",
src/ensembl/package.json
View file @
6eb971c5
...
...
@@ -117,6 +117,7 @@
"babel-loader"
:
"8.1.0"
,
"babel-plugin-react-remove-properties"
:
"0.3.0"
,
"brotli-webpack-plugin"
:
"1.1.0"
,
"comlink"
:
"4.3.0"
,
"compression-webpack-plugin"
:
"5.0.2"
,
"connect-history-api-fallback"
:
"1.6.0"
,
"copy-webpack-plugin"
:
"6.1.0"
,
...
...
@@ -170,7 +171,8 @@
"webpack-cli"
:
"3.3.12"
,
"webpack-dev-server"
:
"3.11.0"
,
"webpack-merge"
:
"5.1.4"
,
"workbox-webpack-plugin"
:
"5.1.4"
"workbox-webpack-plugin"
:
"5.1.4"
,
"worker-plugin"
:
"5.0.0"
},
"browserslist"
:
[
"> 1% and last 2 versions"
,
...
...
src/ensembl/src/shared/components/instant-download/instant-download-fetch/fetchForProtein.ts
View file @
6eb971c5
...
...
@@ -14,6 +14,8 @@
* limitations under the License.
*/
import
{
wrap
}
from
'
comlink
'
;
import
downloadAsFile
from
'
src/shared/helpers/downloadAsFile
'
;
import
{
ProteinOptions
,
...
...
@@ -21,10 +23,15 @@ import {
proteinOptionsOrder
}
from
'
src/shared/components/instant-download/instant-download-protein/InstantDownloadProtein
'
;
import
{
fetchTranscript
Checksums
,
Transcript
Checksums
fetchTranscript
SequenceMetadata
,
Transcript
SequenceMetadata
}
from
'
./fetchSequenceChecksums
'
;
import
{
WorkerApi
,
SingleSequenceFetchParams
}
from
'
src/shared/workers/sequenceFetcher.worker
'
;
type
FetchPayload
=
{
genomeId
:
string
;
transcriptId
:
string
;
...
...
@@ -33,47 +40,60 @@ type FetchPayload = {
export
const
fetchForProtein
=
async
(
payload
:
FetchPayload
)
=>
{
const
{
genomeId
,
transcriptId
,
options
}
=
payload
;
const
productGeneratingContext
=
await
fetchTranscript
Checksums
({
const
transcriptSequenceData
=
await
fetchTranscript
SequenceMetadata
({
genomeId
,
transcriptId
});
const
urls
=
buildUrlsForProtein
(
productGeneratingContext
,
options
);
const
sequencePromises
=
urls
.
map
((
url
)
=>
fetch
(
url
).
then
((
response
)
=>
response
.
text
())
);
const
sequenceDownloadParams
=
prepareDownloadParameters
({
transcriptSequenceData
,
options
});
const
worker
=
new
Worker
(
'
src/shared/workers/sequenceFetcher.worker
'
,
{
type
:
'
module
'
});
const
sequences
=
await
Promise
.
all
(
sequencePromises
);
const
combinedFasta
=
sequences
.
join
(
'
\n\n
'
);
const
service
=
wrap
<
WorkerApi
>
(
worker
);
downloadAsFile
(
combinedFasta
,
`
${
transcriptId
}
.fasta`
,
{
const
sequences
=
await
service
.
downloadSequences
(
sequenceDownloadParams
);
worker
.
terminate
();
downloadAsFile
(
sequences
,
`
${
transcriptId
}
.fasta`
,
{
type
:
'
text/x-fasta
'
});
};
const
buildUrlsForProtein
=
(
productGeneratingContext
:
TranscriptChecksums
,
options
:
ProteinOptions
)
=>
{
return
options
?
proteinOptionsOrder
.
filter
((
option
)
=>
options
[
option
])
.
map
((
option
)
=>
buildFetchUrl
(
productGeneratingContext
,
option
))
:
[];
type
PrepareDownloadParametersParams
=
{
transcriptSequenceData
:
TranscriptSequenceMetadata
;
options
:
ProteinOptions
;
};
const
buildFetchUrl
=
(
productGeneratingContext
:
TranscriptChecksums
,
sequenceType
:
ProteinOption
)
=>
{
const
sequenceTypeToContextType
:
Record
<
ProteinOption
,
string
>
=
{
proteinSequence
:
'
product
'
,
cds
:
'
cds
'
};
const
contextType
=
sequenceTypeToContextType
[
sequenceType
]
as
keyof
TranscriptChecksums
;
const
checksum
=
productGeneratingContext
[
contextType
]?.
sequence_checksum
;
const
prepareDownloadParameters
=
(
params
:
PrepareDownloadParametersParams
)
=>
{
const
{
transcriptSequenceData
}
=
params
;
return
proteinOptionsOrder
.
filter
((
option
)
=>
params
.
options
[
option
])
.
map
((
option
)
=>
labelTypeToSequenceType
[
option
])
// 'protein', 'cds'
.
map
((
option
)
=>
{
const
dataForSingleSequence
=
transcriptSequenceData
[
option
];
if
(
!
dataForSingleSequence
)
{
// shouldn't happen; but to keep typescript happy
return
null
;
}
return
{
label
:
dataForSingleSequence
.
label
,
url
:
`/api/refget/sequence/
${
dataForSingleSequence
.
checksum
}
?accept=text/plain`
};
})
.
filter
(
Boolean
)
as
SingleSequenceFetchParams
[];
};
return
`/api/refget/sequence/
${
checksum
}
?accept=text/x-fasta`
;
// map of field names received from component to field names returned when fetching checksums
const
labelTypeToSequenceType
:
Record
<
ProteinOption
,
keyof
TranscriptSequenceMetadata
>
=
{
proteinSequence
:
'
protein
'
,
cds
:
'
cds
'
};
src/ensembl/src/shared/components/instant-download/instant-download-fetch/fetchForTranscript.ts
View file @
6eb971c5
...
...
@@ -14,6 +14,8 @@
* limitations under the License.
*/
import
{
wrap
}
from
'
comlink
'
;
import
downloadAsFile
from
'
src/shared/helpers/downloadAsFile
'
;
import
{
...
...
@@ -22,10 +24,15 @@ import {
transcriptOptionsOrder
}
from
'
src/shared/components/instant-download/instant-download-transcript/InstantDownloadTranscript
'
;
import
{
fetchTranscript
Checksums
,
Transcript
Checksums
fetchTranscript
SequenceMetadata
,
Transcript
SequenceMetadata
}
from
'
./fetchSequenceChecksums
'
;
import
{
WorkerApi
,
SingleSequenceFetchParams
}
from
'
src/shared/workers/sequenceFetcher.worker
'
;
type
Options
=
{
transcript
:
Partial
<
TranscriptOptions
>
;
gene
:
{
...
...
@@ -47,64 +54,77 @@ export const fetchForTranscript = async (payload: FetchPayload) => {
transcriptId
,
options
:
{
transcript
:
transcriptOptions
,
gene
:
geneOptions
}
}
=
payload
;
const
checksums
=
await
fetchTranscript
Checksums
({
const
transcriptSequenceData
=
await
fetchTranscript
SequenceMetadata
({
genomeId
,
transcriptId
});
const
urls
=
buildUrlsForTranscript
({
geneId
,
checksums
},
transcriptOptions
);
const
sequenceDownloadParams
=
prepareDownloadParameters
({
transcriptId
,
transcriptSequenceData
,
options
:
transcriptOptions
});
if
(
geneOptions
.
genomicSequence
)
{
urls
.
push
(
buildFetchUrl
({
geneId
},
'
g
enomicSequence
'
));
sequenceDownloadParams
.
push
(
getG
enomicSequence
Data
(
geneId
));
}
const
sequencePromises
=
urls
.
map
((
url
)
=>
fetch
(
url
).
then
((
response
)
=>
response
.
text
())
);
const
sequences
=
await
Promise
.
all
(
sequencePromises
);
const
combinedFasta
=
sequences
.
join
(
'
\n\n
'
);
const
worker
=
new
Worker
(
'
src/shared/workers/sequenceFetcher.worker
'
,
{
type
:
'
module
'
});
const
service
=
wrap
<
WorkerApi
>
(
worker
);
const
sequences
=
await
service
.
downloadSequences
(
sequenceDownloadParams
);
downloadAsFile
(
combinedFasta
,
`
${
transcriptId
}
.fasta`
,
{
worker
.
terminate
();
downloadAsFile
(
sequences
,
`
${
transcriptId
}
.fasta`
,
{
type
:
'
text/x-fasta
'
});
};
const
buildUrlsForTranscript
=
(
data
:
{
geneId
:
string
;
checksums
:
TranscriptChecksums
;
},
options
:
Partial
<
TranscriptOptions
>
)
=>
{
return
options
?
transcriptOptionsOrder
.
filter
((
option
)
=>
options
[
option
])
.
map
((
option
)
=>
buildFetchUrl
(
data
,
option
))
:
[];
type
PrepareDownloadParametersParams
=
{
transcriptId
:
string
;
transcriptSequenceData
:
TranscriptSequenceMetadata
;
options
:
Partial
<
TranscriptOptions
>
;
};
const
buildFetchUrl
=
(
data
:
{
geneId
:
string
;
checksums
?:
TranscriptChecksums
;
},
sequenceType
:
TranscriptOption
)
=>
{
const
sequenceTypeToContextType
:
Record
<
TranscriptOption
,
string
>
=
{
genomicSequence
:
'
genomic
'
,
proteinSequence
:
'
product
'
,
cdna
:
'
cdna
'
,
cds
:
'
cds
'
};
// map of field names received from component to field names returned when fetching checksums
const
labelTypeToSequenceType
:
Record
<
TranscriptOption
,
keyof
TranscriptSequenceMetadata
|
'
genomic
'
>
=
{
genomicSequence
:
'
genomic
'
,
proteinSequence
:
'
protein
'
,
cdna
:
'
cdna
'
,
cds
:
'
cds
'
};
if
(
sequenceType
===
'
genomicSequence
'
)
{
return
`https://rest.ensembl.org/sequence/id/
${
data
.
geneId
}
?content-type=text/x-fasta&type=
${
sequenceTypeToContextType
.
genomicSequence
}
`
;
}
else
{
const
contextType
=
sequenceTypeToContextType
[
sequenceType
]
as
keyof
TranscriptChecksums
;
const
checksum
=
data
.
checksums
&&
data
.
checksums
[
contextType
]?.
sequence_checksum
;
const
prepareDownloadParameters
=
(
params
:
PrepareDownloadParametersParams
)
=>
{
return
transcriptOptionsOrder
.
filter
((
option
)
=>
params
.
options
[
option
])
.
map
((
option
)
=>
labelTypeToSequenceType
[
option
])
// 'genomic', 'protein', 'cdna', 'cds'
.
map
((
option
)
=>
{
if
(
option
===
'
genomic
'
)
{
return
getGenomicSequenceData
(
params
.
transcriptId
);
}
else
{
const
dataForSingleSequence
=
params
.
transcriptSequenceData
[
option
];
if
(
!
dataForSingleSequence
)
{
// shouldn't happen; but to keep typescript happy
return
null
;
}
return
{
label
:
dataForSingleSequence
.
label
,
url
:
`/api/refget/sequence/
${
dataForSingleSequence
.
checksum
}
?accept=text/plain`
};
}
})
.
filter
(
Boolean
)
as
SingleSequenceFetchParams
[];
};
return
`/api/refget/sequence/
${
checksum
}
?accept=text/x-fasta`
;
}
const
getGenomicSequenceData
=
(
id
:
string
)
=>
{
return
{
label
:
`
${
id
}
genomic`
,
url
:
`https://rest.ensembl.org/sequence/id/
${
id
}
?content-type=text/plain&type=genomic`
};
};
src/ensembl/src/shared/components/instant-download/instant-download-fetch/fetchSequenceChecksums.ts
View file @
6eb971c5
...
...
@@ -18,21 +18,35 @@ import { gql } from '@apollo/client';
import
{
client
}
from
'
src/gql-client
'
;
export
type
TranscriptChecksums
=
{
cdna
:
{
sequence_checksum
:
string
;
export
type
TranscriptSequenceMetadata
=
{
cdna
?:
{
checksum
:
string
;
label
:
string
;
};
cds
:
{
sequence_checksum
:
string
;
cds
?:
{
checksum
:
string
;
label
:
string
;
};
product
:
{
sequence_checksum
:
string
;
protein
?:
{
checksum
:
string
;
label
:
string
;
};
};
type
GeneFragmen
t
=
{
type
TranscriptQueryResul
t
=
{
transcript
:
{
product_generating_contexts
:
TranscriptChecksums
[];
product_generating_contexts
:
Array
<
{
cdna
:
{
sequence_checksum
:
string
;
};
cds
:
{
sequence_checksum
:
string
;
};
product
:
{
stable_id
:
string
;
sequence_checksum
:
string
;
};
}
>
;
};
};
...
...
@@ -47,6 +61,7 @@ const transcriptChecksumsQuery = gql`
sequence_checksum
}
product {
stable_id
sequence_checksum
}
}
...
...
@@ -59,10 +74,37 @@ type Variables = {
transcriptId
:
string
;
};
export
const
fetchTranscriptChecksums
=
(
variables
:
Variables
)
=>
client
.
query
<
GeneFragment
>
({
export
const
fetchTranscriptSequenceMetadata
=
(
variables
:
Variables
):
Promise
<
TranscriptSequenceMetadata
>
=>
{
const
{
transcriptId
}
=
variables
;
return
client
.
query
<
TranscriptQueryResult
>
({
query
:
transcriptChecksumsQuery
,
variables
})
.
then
(({
data
})
=>
data
.
transcript
.
product_generating_contexts
[
0
]);
.
then
(({
data
})
=>
{
// TODO: expect to fetch genomic sequence here as well when checksum becomes available
const
productGeneratingContext
=
data
.
transcript
.
product_generating_contexts
[
0
];
if
(
!
productGeneratingContext
)
{
return
{};
}
return
{
cdna
:
{
checksum
:
productGeneratingContext
.
cdna
.
sequence_checksum
,
label
:
`
${
transcriptId
}
cdna`
},
cds
:
{
checksum
:
productGeneratingContext
.
cds
.
sequence_checksum
,
label
:
`
${
transcriptId
}
cds`
},
protein
:
{
checksum
:
productGeneratingContext
.
product
.
sequence_checksum
,
label
:
`
${
productGeneratingContext
.
product
.
stable_id
}
pep`
}
};
});
};
src/ensembl/src/shared/helpers/formatters/fastaFormatter.test.ts
0 → 100644
View file @
6eb971c5
import
{
toFasta
,
LINE_LENGTH
}
from
'
./fastaFormatter
'
;
import
random
from
'
lodash/random
'
;
const
generateSequence
=
(
length
:
number
)
=>
{
const
alphabet
=
'
AGCT
'
;
let
sequence
=
''
;
for
(
let
i
=
0
;
i
<=
length
;
i
++
)
{
const
characterIndex
=
Math
.
floor
(
Math
.
random
()
*
alphabet
.
length
);
const
character
=
alphabet
[
characterIndex
];
sequence
+=
character
;
}
return
sequence
;
};
describe
(
'
fasta formatter
'
,
()
=>
{
it
(
'
formats raw sequence in the fasta format
'
,
()
=>
{
const
sequenceLength
=
random
(
1
,
600
);
const
sequenceLabel
=
'
label for the sequence
'
;
const
rawSequence
=
generateSequence
(
sequenceLength
);
const
fastaFormattedSequence
=
toFasta
(
sequenceLabel
,
rawSequence
);
const
[
firstLine
,
...
sequenceLines
]
=
fastaFormattedSequence
.
split
(
'
\n
'
);
expect
(
firstLine
).
toBe
(
`>
${
sequenceLabel
}
`
);
expect
(
sequenceLines
.
every
(
line
=>
line
.
length
<=
LINE_LENGTH
));
expect
(
sequenceLines
.
join
(
''
)).
toBe
(
rawSequence
);
});
});
src/ensembl/src/shared/helpers/formatters/fastaFormatter.ts
0 → 100644
View file @
6eb971c5
export
const
LINE_LENGTH
=
60
;
// line length in Ensembl refget implementations
export
const
toFasta
=
(
sequenceLabel
:
string
,
sequence
:
string
)
=>
{
const
formattedSequence
=
[];
formattedSequence
.
push
(
`>
${
sequenceLabel
}
`
);
let
row
=
''
;
for
(
let
i
=
0
;
i
<
sequence
.
length
;
i
++
)
{
row
+=
sequence
[
i
];
const
isAtEndOfLine
=
(
i
+
1
)
%
LINE_LENGTH
===
0
;
if
(
i
===
sequence
.
length
-
1
||
isAtEndOfLine
)
{
formattedSequence
.
push
(
row
);
row
=
''
;
}
}
return
formattedSequence
.
join
(
'
\n
'
);
};
src/ensembl/src/shared/workers/sequenceFetcher.worker.ts
0 → 100644
View file @
6eb971c5
/**
* See the NOTICE file distributed with this work for additional information
* regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import
{
expose
}
from
'
comlink
'
;
import
{
toFasta
}
from
'
src/shared/helpers/formatters/fastaFormatter
'
;
export
type
SingleSequenceFetchParams
=
{
label
:
string
;
url
:
string
;
};
export
type
SequenceFetcherParams
=
Array
<
SingleSequenceFetchParams
>
;
const
downloadSequences
=
async
(
params
:
SequenceFetcherParams
)
=>
{
const
sequencePromises
=
params
.
map
(({
label
,
url
})
=>
{
return
fetch
(
url
)
.
then
((
response
)
=>
response
.
text
())
.
then
((
sequence
)
=>
toFasta
(
label
,
sequence
));
});
const
sequences
=
await
Promise
.
all
(
sequencePromises
);
// start new sequence on a new line; no empty lines allowed in FASTA files
return
sequences
.
join
(
'
\n
'
);
};
const
workerApi
=
{
downloadSequences
};
export
type
WorkerApi
=
typeof
workerApi
;
expose
(
workerApi
);
src/ensembl/webpack/environments/webpack.common.js
View file @
6eb971c5
const
path
=
require
(
'
path
'
);
const
postcssPresetEnv
=
require
(
'
postcss-preset-env
'
);
const
HtmlPlugin
=
require
(
'
html-webpack-plugin
'
);
const
MiniCssExtractPlugin
=
require
(
'
mini-css-extract-plugin
'
);
const
ForkTsCheckerPlugin
=
require
(
'
fork-ts-checker-webpack-plugin
'
);
const
WorkerPlugin
=
require
(
"
worker-plugin
"
);
const
{
getPaths
}
=
require
(
'
../paths
'
);
const
{
isDevelopment
}
=
require
(
'
./environment-detector
'
);
...
...
@@ -97,7 +97,9 @@ module.exports = (env) => {
filename
:
isDev
?
'
index.html
'
:
'
../index.html
'
,
template
:
paths
.
htmlTemplatePath
,
publicPath
:
'
/
'
})
}),
new
WorkerPlugin
()
],
// configuration that allows us to not to use file extensions and shorten import paths (using aliases)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment