transcript.ts 6.81 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/**
 * See the NOTICE file distributed with this work for additional information
 * regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

17 18 19 20
import faker from 'faker';
import times from 'lodash/times';

import { createSlice } from './slice';
21
import { createProduct } from './product';
22
import { createExternalReference } from './external-reference';
23 24
import { getFeatureCoordinates } from 'src/content/app/entity-viewer/shared/helpers/entity-helpers';

25
import { FullTranscript } from 'src/shared/types/thoas/transcript';
Manoj Pandian Sakthivel's avatar
Manoj Pandian Sakthivel committed
26 27
import { Exon, SplicedExon, PhasedExon } from 'src/shared/types/thoas/exon';
import { Slice } from 'src/shared/types/thoas/slice';
28
import { FullCDS } from 'src/shared/types/thoas/cds';
Manoj Pandian Sakthivel's avatar
Manoj Pandian Sakthivel committed
29
import { CDNA } from 'src/shared/types/thoas/cdna';
30
import { FullProductGeneratingContext } from 'src/shared/types/thoas/productGeneratingContext';
Manoj Pandian Sakthivel's avatar
Manoj Pandian Sakthivel committed
31 32
import { ProductType } from 'src/shared/types/thoas/product';
import { ExternalReference } from 'src/shared/types/thoas/externalReference';
33
import { TranscriptMetadata } from 'ensemblRoot/src/shared/types/thoas/metadata';
34

35 36 37 38 39
type ProteinCodingProductGeneratingContext = Omit<
  FullProductGeneratingContext,
  'cds'
> & { cds: FullCDS };

40 41
export type ProteinCodingTranscript = Omit<
  Omit<FullTranscript, 'gene'>,
42 43 44 45 46
  'product_generating_contexts'
> & {
  product_generating_contexts: ProteinCodingProductGeneratingContext[];
};

47
export const createTranscript = (
48 49
  fragment: Partial<ProteinCodingTranscript> = {}
): ProteinCodingTranscript => {
50 51
  const transcriptSlice = createSlice();

Andrey Azov's avatar
Andrey Azov committed
52
  const unversionedStableId = faker.datatype.uuid();
53 54 55 56 57
  const version = 1;
  const stableId = `${unversionedStableId}.${version}`;

  const exons = createExons(transcriptSlice);

58
  return {
59
    type: 'Transcript',
60 61 62
    stable_id: stableId,
    unversioned_stable_id: unversionedStableId,
    version,
63 64
    symbol: faker.lorem.word(),
    slice: transcriptSlice,
Manoj Pandian Sakthivel's avatar
Manoj Pandian Sakthivel committed
65
    external_references: createExternalReferences(),
66 67 68 69 70 71 72 73 74
    relative_location: {
      start: 1,
      end: transcriptSlice.location.end,
      length: transcriptSlice.location.length
    },
    spliced_exons: createSplicedExons(transcriptSlice, exons),
    product_generating_contexts: [
      createProductGeneratingContext(transcriptSlice, exons)
    ],
75
    metadata: createTranscriptMetadata(),
76
    ...fragment
77 78 79
  };
};

80 81 82
export const createTranscriptMetadata = (
  fragment?: Partial<TranscriptMetadata>
): TranscriptMetadata => {
83
  return {
84 85 86 87 88
    biotype: {
      label: faker.lorem.word(),
      value: faker.lorem.word(),
      definition: faker.lorem.sentence()
    },
89
    canonical: null,
90 91 92
    mane: null,
    gencode_basic: null,
    tsl: null,
93 94
    appris: null,
    ...fragment
95 96 97
  };
};

Manoj Pandian Sakthivel's avatar
Manoj Pandian Sakthivel committed
98
const createExternalReferences = (): ExternalReference[] => {
Andrey Azov's avatar
Andrey Azov committed
99
  const numberOfExternalReferences = faker.datatype.number({ min: 1, max: 10 });
Manoj Pandian Sakthivel's avatar
Manoj Pandian Sakthivel committed
100

101
  return times(numberOfExternalReferences, () => createExternalReference());
Manoj Pandian Sakthivel's avatar
Manoj Pandian Sakthivel committed
102 103
};

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
const createSplicedExons = (
  transcriptSlice: Slice,
  exons: Exon[]
): SplicedExon[] => {
  // exons passed into this function are expected to already be sorted by their location

  const { start: transcriptStart } = getFeatureCoordinates({
    slice: transcriptSlice
  });
  return exons.map((exon, index) => {
    const { start: exonStart, end: exonEnd } = getFeatureCoordinates({
      slice: exon.slice
    });
    const relativeStart = exonStart - transcriptStart + 1;
    const relativeEnd = exonEnd - transcriptStart + 1;

    return {
      index: index + 1,
      relative_location: {
        start: relativeStart,
        end: relativeEnd,
        length: relativeEnd - relativeStart + 1
      },
      exon
    };
  });
};

const createPhasedExons = (exons: Exon[]): PhasedExon[] => {
  return exons.map((exon, index) => ({
    index: index + 1,
    start_phase: -1,
    end_phase: -1,
    exon
  }));
};

141
const createExons = (transcriptSlice: Slice): Exon[] => {
142 143 144
  const { start: transcriptStart, end: transcriptEnd } = getFeatureCoordinates({
    slice: transcriptSlice
  });
145
  const length = transcriptEnd - transcriptStart + 1;
146

Andrey Azov's avatar
Andrey Azov committed
147
  const numberOfExons = faker.datatype.number({ min: 1, max: 10 });
148 149 150
  const maxExonLength = Math.floor(length / numberOfExons);

  return times(numberOfExons, (index: number) => {
151 152
    const minCoordinate = transcriptStart + (maxExonLength * index + 1);
    const maxCoordinate = minCoordinate + maxExonLength;
153 154
    const middleCoordinate =
      maxCoordinate - (maxCoordinate - minCoordinate) / 2;
Andrey Azov's avatar
Andrey Azov committed
155
    const exonStart = faker.datatype.number({
156 157 158
      min: minCoordinate,
      max: middleCoordinate
    });
Andrey Azov's avatar
Andrey Azov committed
159
    const exonEnd = faker.datatype.number({
160 161 162
      min: middleCoordinate + 1,
      max: maxCoordinate - 1
    });
163 164 165
    const startPosition = index > 0 ? exonStart : transcriptStart;
    const endPosition = index < numberOfExons - 1 ? exonEnd : transcriptEnd;
    const length = endPosition - startPosition + 1;
166 167
    const slice = {
      location: {
168
        start: index > 0 ? exonStart : transcriptStart,
169 170
        end: index < numberOfExons - 1 ? exonEnd : transcriptEnd,
        length
171
      },
172
      strand: transcriptSlice.strand,
173 174 175 176
      region: transcriptSlice.region
    };

    return {
Andrey Azov's avatar
Andrey Azov committed
177
      stable_id: faker.datatype.uuid(),
178
      slice
179 180 181 182
    };
  });
};

183
const createCDS = (transcriptSlice: Slice): FullCDS => {
184
  const { start, end } = getFeatureCoordinates({ slice: transcriptSlice });
185 186
  const nucleotideLength = end - start + 1;
  const proteinLength = Math.floor(nucleotideLength / 3);
187 188 189 190

  return {
    start: start,
    end: end,
191 192 193
    relative_start: 1,
    relative_end: nucleotideLength,
    nucleotide_length: nucleotideLength,
194 195 196 197
    protein_length: proteinLength,
    sequence: {
      checksum: faker.datatype.uuid()
    }
198 199 200 201 202 203 204 205 206
  };
};

const createCDNA = (transcriptSlice: Slice): CDNA => {
  const { start, end } = getFeatureCoordinates({ slice: transcriptSlice });

  return {
    start,
    end,
207 208 209 210
    length: end - start + 1,
    sequence: {
      checksum: faker.datatype.uuid()
    }
211 212 213 214 215 216
  };
};

const createProductGeneratingContext = (
  transcriptSlice: Slice,
  exons: Exon[]
217
): ProteinCodingProductGeneratingContext => {
218 219 220 221 222 223 224 225 226 227 228
  return {
    product_type: ProductType.PROTEIN,
    default: true,
    cds: createCDS(transcriptSlice),
    five_prime_utr: null,
    three_prime_utr: null,
    cdna: createCDNA(transcriptSlice),
    phased_exons: createPhasedExons(exons),
    product: createProduct({
      length: Math.floor(transcriptSlice.location.length / 3)
    })
229 230
  };
};