views.py 30 KB
Newer Older
1
from __future__ import print_function
2
"""
3
Copyright [2009-2017] EMBL-European Bioinformatics Institute
4 5 6 7 8 9 10 11 12 13
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
14
import re
15 16
import requests
import zlib
17
from itertools import chain
18

19
from django.http import Http404, HttpResponse
20
from django.shortcuts import get_object_or_404
21
from django_filters import rest_framework as filters
22
from rest_framework import generics, renderers, status
23
from rest_framework.mixins import RetrieveModelMixin, ListModelMixin
24
from rest_framework.response import Response
25
from rest_framework.views import APIView
26
from rest_framework.viewsets import GenericViewSet
27

28 29
from rest_framework.permissions import AllowAny
from rest_framework.reverse import reverse
30 31
from rest_framework_jsonp.renderers import JSONPRenderer
from rest_framework_yaml.renderers import YAMLRenderer
32

33
from apiv1.serializers import RnaNestedSerializer, AccessionSerializer, CitationSerializer, XrefSerializer, \
34
                              RnaFlatSerializer, RnaFastaSerializer, RnaGffSerializer, RnaGff3Serializer, RnaBedSerializer, \
35
                              RnaSpeciesSpecificSerializer, ExpertDatabaseStatsSerializer, \
36
                              RawPublicationSerializer, RnaSecondaryStructureSerializer, \
37
                              RfamHitSerializer, SequenceFeatureSerializer, \
Anton Petrov's avatar
Anton Petrov committed
38
                              EnsemblAssemblySerializer, ProteinTargetsSerializer, \
39
                              LncrnaTargetsSerializer, EnsemblComparaSerializer, SecondaryStructureSVGImageSerializer
40

Carlos Eduardo Ribas's avatar
Carlos Eduardo Ribas committed
41
from apiv1.renderers import RnaFastaRenderer
42
from portal.models import Rna, RnaPrecomputed, Accession, Xref, Database, DatabaseStats, RfamHit, EnsemblAssembly,\
Anton Petrov's avatar
Anton Petrov committed
43
    GoAnnotation, RelatedSequence, ProteinInfo, SequenceFeature,\
44
    SequenceRegion, EnsemblCompara, SecondaryStructureWithLayout
45
from portal.config.expert_databases import expert_dbs
46
from rnacentral.utils.pagination import Pagination, PaginatedRawQuerySet
47

48 49 50
from colorhash import ColorHash


51 52 53 54
"""
Docstrings of the classes exposed in urlpatterns support markdown.
"""

55 56 57 58
# maximum number of xrefs to use with prefetch_related
MAX_XREFS_TO_PREFETCH = 1000


59
class GenomeAnnotations(APIView):
60 61 62 63 64 65 66 67 68
    """
    Ensembl-like genome coordinates endpoint.

    [API documentation](/api)
    """
    # the above docstring appears on the API website

    permission_classes = (AllowAny,)

69 70 71 72 73
    def get(self, request, species, chromosome, start, end, format=None):
        start = start.replace(',', '')
        end = end.replace(',', '')

        try:
Carlos Eduardo Ribas's avatar
Carlos Eduardo Ribas committed
74
            assembly = EnsemblAssembly.objects.filter(ensembl_url=species).first()
75 76 77
        except EnsemblAssembly.DoesNotExist:
            return Response([])

78 79 80 81 82 83 84 85 86 87
        regions = SequenceRegion.objects\
            .select_related('urs_taxid')\
            .prefetch_related('exons')\
            .filter(
                assembly=assembly,
                chromosome=chromosome,
                region_start__gte=start,
                region_stop__lte=end,
                urs_taxid__is_active=True
            )
88 89

        features = []
90 91 92
        for transcript in regions:
            features.append({
                'ID': transcript.region_name,
93
                'external_name': transcript.urs_taxid.id.split('_')[0],
94 95
                'taxid': assembly.taxid,  # added by Burkov for generating links to E! in Genoverse populateMenu() popups
                'feature_type': 'transcript',
96
                'logic_name': 'RNAcentral',  # required by Genoverse
97 98 99 100 101 102
                'biotype': transcript.urs_taxid.rna_type,  # required by Genoverse
                'description': transcript.urs_taxid.short_description,
                'seq_region_name': transcript.chromosome,
                'strand': transcript.strand,
                'start': transcript.region_start,
                'end': transcript.region_stop,
103
                'databases': transcript.providing_databases
104 105
            })

106
            # exons
107
            for exon in transcript.exons.all():
108 109 110 111 112
                features.append({
                    'external_name': exon.id,
                    'ID': exon.id,
                    'taxid': assembly.taxid,  # added by Burkov for generating links to E! in Genoverse populateMenu() popups
                    'feature_type': 'exon',
113
                    'Parent': transcript.region_name,
114 115 116
                    'logic_name': 'RNAcentral',  # required by Genoverse
                    'biotype': transcript.urs_taxid.rna_type,  # required by Genoverse
                    'seq_region_name': transcript.chromosome,
117
                    'strand': transcript.strand,
118 119 120 121
                    'start': exon.exon_start,
                    'end': exon.exon_stop,
                })

122
        return Response(features)
Anton Petrov's avatar
Anton Petrov committed
123 124


125 126 127 128 129 130 131 132 133 134 135
class APIRoot(APIView):
    """
    This is the root of the RNAcentral API Version 1.

    [API documentation](/api)
    """
    # the above docstring appears on the API website
    permission_classes = (AllowAny,)

    def get(self, request, format=format):
        return Response({
Anton Petrov's avatar
Anton Petrov committed
136
            'rna': reverse('rna-sequences', request=request),
137 138 139
        })


140
class RnaFilter(filters.FilterSet):
141
    """Declare what fields can be filtered using django-filters"""
142 143
    min_length = filters.NumberFilter(name="length", lookup_expr='gte')
    max_length = filters.NumberFilter(name="length", lookup_expr='lte')
144 145
    external_id = filters.CharFilter(name="xrefs__accession__external_id", distinct=True)
    database = filters.CharFilter(name="xrefs__accession__database")
Anton Petrov's avatar
Anton Petrov committed
146 147 148

    class Meta:
        model = Rna
149
        fields = ['upi', 'md5', 'length', 'min_length', 'max_length', 'external_id', 'database']
150 151


152
class RnaMixin(object):
153
    """Mixin for additional functionality specific to Rna views."""
154
    def get_serializer_class(self):
155
        """Determine a serializer for RnaSequences and RnaDetail views."""
156 157
        if self.request.accepted_renderer.format == 'fasta':
            return RnaFastaSerializer
158 159
        elif self.request.accepted_renderer.format == 'gff':
            return RnaGffSerializer
160 161
        elif self.request.accepted_renderer.format == 'gff3':
            return RnaGff3Serializer
162 163
        elif self.request.accepted_renderer.format == 'bed':
            return RnaBedSerializer
164

165
        flat = self.request.query_params.get('flat', 'false')
166 167 168
        if re.match('true', flat, re.IGNORECASE):
            return RnaFlatSerializer
        return RnaNestedSerializer
169

170

Anton Petrov's avatar
Anton Petrov committed
171
class RnaSequences(RnaMixin, generics.ListAPIView):
172
    """
173
    Unique RNAcentral Sequences
174

175
    [API documentation](/api)
176
    """
177
    # the above docstring appears on the API website
178
    permission_classes = (AllowAny,)
Anton Petrov's avatar
Anton Petrov committed
179
    filter_class = RnaFilter
180
    renderer_classes = (renderers.JSONRenderer, JSONPRenderer,
181
                        renderers.BrowsableAPIRenderer,
182
                        YAMLRenderer, RnaFastaRenderer)
183
    pagination_class = Pagination
184

185 186 187 188 189 190 191 192 193 194 195 196 197 198
    def list(self, request, *args, **kwargs):
        """
        List view in Django Rest Framework is responsible
        for displaying entries from the queryset.
        Here the view is overridden in order to avoid
        performance bottlenecks.

        * estimate the number of xrefs for each Rna
        * prefetch_related only for Rnas with a small number of xrefs
        * do not attempt to optimise entries with a large number of xrefs
          letting Django hit the database one time for each xref
        * flat serializer limits the total number of displayed xrefs
        """
        # begin DRF base code
199 200 201
        queryset = self.filter_queryset(self.get_queryset())

        page = self.paginate_queryset(queryset)
202 203
        # end DRF base code

204
        # begin RNAcentral override: use prefetch_related where possible
205
        flat = self.request.query_params.get('flat', None)
206 207 208 209 210 211 212 213 214
        if flat:
            to_prefetch = []
            no_prefetch = []
            for rna in page:
                if rna.xrefs.count() <= MAX_XREFS_TO_PREFETCH:
                    to_prefetch.append(rna.upi)
                else:
                    no_prefetch.append(rna.upi)

215 216
            prefetched = self.filter_queryset(Rna.objects.filter(upi__in=to_prefetch).prefetch_related('xrefs__accession').all())
            not_prefetched = self.filter_queryset(Rna.objects.filter(upi__in=no_prefetch).all())
217 218

            result_list = list(chain(prefetched, not_prefetched))
219
            page.object_list = result_list  # override data while keeping the rest of the pagination object
220
        # end RNAcentral override
221 222 223

        # begin DRF base code
        if page is not None:
224 225
            serializer = self.get_serializer(page, many=True)
            return self.get_paginated_response(serializer.data)
226

227
        serializer = self.get_serializer(queryset, many=True)
228 229 230
        return Response(serializer.data)
        # end DRF base code

231
    def _get_database_id(self, db_name):
232
        """Map the `database` parameter from the url to internal database ids"""
233 234 235 236
        for expert_database in Database.objects.all():
            if re.match(expert_database.label, db_name, re.IGNORECASE):
                return expert_database.id
        return None
237 238 239 240 241 242

    def get_queryset(self):
        """
        Manually filter against the `database` query parameter,
        use RnaFilter for other filtering operations.
        """
243
        db_name = self.request.query_params.get('database', None)
244 245
        # `seq_long` **must** be deferred in order for filters to work
        queryset = Rna.objects.defer('seq_long')
246 247 248
        if db_name:
            db_id = self._get_database_id(db_name)
            if db_id:
249
                return queryset.filter(xrefs__db=db_id).distinct().all()
250
            else:
251 252
                return Rna.objects.none()
        return queryset.all()
253 254


255
class RnaDetail(RnaMixin, generics.RetrieveAPIView):
256 257
    """
    Unique RNAcentral Sequence
258 259

    [API documentation](/api)
260
    """
261
    # the above docstring appears on the API website
262
    queryset = Rna.objects.all()
Carlos Eduardo Ribas's avatar
Carlos Eduardo Ribas committed
263 264 265
    renderer_classes = (
        renderers.JSONRenderer, JSONPRenderer, renderers.BrowsableAPIRenderer, YAMLRenderer, RnaFastaRenderer
    )
266

267
    def get_object(self):
268
        """
269 270
        Prefetch related objects only when `flat=True`
        and the number of xrefs is not too large.
271
        """
272 273 274 275 276 277 278
        queryset = self.filter_queryset(self.get_queryset())

        # Perform the lookup filtering.
        lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field
        filter_kwargs = {self.lookup_field: self.kwargs[lookup_url_kwarg]}
        rna = get_object_or_404(queryset, **filter_kwargs)

279
        flat = self.request.query_params.get('flat', None)
280
        if flat and rna.xrefs.count() <= MAX_XREFS_TO_PREFETCH:
281
            queryset = queryset.prefetch_related('xrefs', 'xrefs__accession')
282 283 284
            return get_object_or_404(queryset, **filter_kwargs)
        else:
            return rna
285

286

287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
class RnaSpeciesSpecificView(generics.RetrieveAPIView):
    """
    API endpoint for retrieving species-specific details
    about Unique RNA Sequences.

    [API documentation](/api)
    """
    # the above docstring appears on the API website

    """
    This endpoint is used by Protein2GO.
    Contact person: Tony Sawford.
    """
    queryset = Rna.objects.all()

    def get(self, request, pk, taxid, format=None):
        rna = self.get_object()
        xrefs = rna.xrefs.filter(taxid=taxid)
        if not xrefs:
            raise Http404
        serializer = RnaSpeciesSpecificSerializer(rna, context={
            'request': request,
            'xrefs': xrefs,
            'taxid': taxid,
        })
        return Response(serializer.data)


315
class XrefList(generics.ListAPIView):
316
    """
317 318 319
    List of cross-references for a particular RNA sequence.

    [API documentation](/api)
320
    """
321
    serializer_class = XrefSerializer
322
    pagination_class = Pagination
323

324 325
    def get_queryset(self):
        upi = self.kwargs['pk']
326
        return Rna.objects.get(upi=upi).get_xrefs()
327 328


329
class XrefsSpeciesSpecificList(generics.ListAPIView):
330
    """
331
    List of cross-references for a particular RNA sequence in a specific species.
332 333 334

    [API documentation](/api)
    """
335
    serializer_class = XrefSerializer
336
    pagination_class = Pagination
337

338 339 340
    def get_queryset(self):
        upi = self.kwargs['pk']
        taxid = self.kwargs['taxid']
341
        return Rna.objects.get(upi=upi).get_xrefs(taxid=taxid)
342 343


344
class SecondaryStructureSpeciesSpecificList(generics.ListAPIView):
345
    """
346
    List of secondary structures for a particular RNA sequence in a specific species.
347 348 349

    [API documentation](/api)
    """
350
    queryset = Rna.objects.all()
351 352

    def get(self, request, pk=None, taxid=None, format=None):
353
        """Get a list of secondary structures"""
354
        rna = self.get_object()
355
        serializer = RnaSecondaryStructureSerializer(rna)
356 357 358
        return Response(serializer.data)


359 360 361 362 363 364 365 366
class SecondaryStructureSVGImage(generics.ListAPIView):
    """
    SVG image for an RNA sequence.
    """
    serializer_class = SecondaryStructureSVGImageSerializer
    permission_classes = (AllowAny,)

    def get(self, request, pk=None, format=None):
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
        ftp = "http://ftp.ebi.ac.uk/pub/databases/RNAcentral/current_release/.secondary-structure/secondary-structure/{}.svg.gz"
        upi = list(self.kwargs['pk'])
        upi_path = "".join(upi[0:3]) + "/" \
                   + "".join(upi[3:5]) + "/" \
                   + "".join(upi[5:7]) + "/" \
                   + "".join(upi[7:9]) + "/" \
                   + "".join(upi[9:11]) + "/"
        url = ftp.format(upi_path + "".join(upi))

        try:
            response = requests.get(url)
            response.raise_for_status()
            svg_ftp = zlib.decompress(response.content, zlib.MAX_WBITS | 32)
        except requests.exceptions.HTTPError as e:
            svg_ftp = None

383
        try:
384 385
            svg_bd = SecondaryStructureWithLayout.objects.get(urs="".join(upi))
            svg_bd = svg_bd.layout
386
        except SecondaryStructureWithLayout.DoesNotExist:
387 388 389
            svg_bd = None

        if not svg_ftp and not svg_bd:
390 391
            return Response(status=status.HTTP_404_NOT_FOUND)

392 393 394
        return HttpResponse(
            self.generate_thumbnail(svg_ftp if svg_ftp else svg_bd, "".join(upi)), content_type='image/svg+xml'
        )
395 396 397

    def generate_thumbnail(self, image, upi):
        move_to_start_position = None
398
        color = ColorHash(upi).hex
399
        points = []
400 401
        width = []
        height = []
402
        for i, line in enumerate(image.split('\n')):
403
            if not width:
404
                width = re.findall(r'width="(\d+(\.\d+)?)"', line)
405
            if not height:
406
                height = re.findall(r'height="(\d+(\.\d+)?)"', line)
407 408 409
            for nt in re.finditer('<text x="(\d+)(\.\d+)?" y="(\d+)(\.\d+)?".*?</text>', line):
                if 'numbering-label' in nt.group(0):
                    continue
410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
                if not move_to_start_position:
                    move_to_start_position = 'M{} {} '.format(nt.group(1), nt.group(3))
                points.append('L{} {}'.format(nt.group(1), nt.group(3)))
        if len(points) < 200:
            stroke_width = '3'
        elif len(points) < 500:
            stroke_width = '4'
        elif len(points) < 3000:
            stroke_width = '4'
        else:
            stroke_width = '2'
        thumbnail = '<svg xmlns="http://www.w3.org/2000/svg" width="{}" height="{}"><path style="stroke:{};stroke-width:{}px;fill:none;" d="'.format(width[0][0], height[0][0], color, stroke_width)
        thumbnail += move_to_start_position
        thumbnail += ' '.join(points)
        thumbnail += '"/></svg>'
        return thumbnail
426 427


428
class RnaGenomeLocations(generics.ListAPIView):
429 430 431 432 433 434 435 436 437
    """
    List of distinct genomic locations, where a specific RNA
    is found in a specific species, extracted from xrefs.

    [API documentation](/api)
    """
    queryset = Rna.objects.select_related().all()

    def get(self, request, pk=None, taxid=None, format=None):
438 439 440 441 442 443
        # if assembly with this taxid is not found, just return empty locations list
        try:
            assembly = EnsemblAssembly.objects.get(taxid=taxid)  # this applies only to species-specific pages
        except EnsemblAssembly.DoesNotExist:
            return Response([])

444
        rna = self.get_object()
445 446
        urs_taxid = rna.upi + "_" + str(assembly.taxid)
        rna_precomputed = RnaPrecomputed.objects.get(id=urs_taxid)
447

448
        regions = SequenceRegion.objects.filter(urs_taxid=rna_precomputed)
449

450
        output = []
451 452 453 454
        for region in regions:
            output.append({
                'chromosome': region.chromosome,
                'strand': region.strand,
455 456
                'start': region.region_start,
                'end': region.region_stop,
457
                'identity': region.identity,
458 459
                'species': assembly.ensembl_url,
                'ucsc_db_id': assembly.assembly_ucsc,
460 461 462 463
                'ensembl_division': {
                    'name': assembly.division,
                    'url': 'http://' + assembly.subdomain
                },
464
                'ensembl_species_url': assembly.ensembl_url
465 466 467 468 469 470 471
            })

            exceptions = ['X', 'Y']
            if re.match(r'\d+', output[-1]['chromosome']) or output[-1]['chromosome'] in exceptions:
                output[-1]['ucsc_chromosome'] = 'chr' + output[-1]['chromosome']
            else:
                output[-1]['ucsc_chromosome'] = output[-1]['chromosome']
472 473 474 475

        return Response(output)


476
class AccessionView(generics.RetrieveAPIView):
477
    """
478
    API endpoint that allows single accessions to be viewed.
479

480
    [API documentation](/api)
481
    """
482
    # the above docstring appears on the API website
483
    queryset = Accession.objects.select_related().all()
484
    serializer_class = AccessionSerializer
485

486

487
class CitationsView(generics.ListAPIView):
488 489
    """
    API endpoint that allows the citations associated with
490
    a particular cross-reference to be viewed.
491

492
    [API documentation](/api)
493
    """
494
    serializer_class = CitationSerializer
495

496 497 498
    def get_queryset(self):
        pk = self.kwargs['pk']
        return Accession.objects.select_related().get(pk=pk).refs.all()
499

500

501
class RnaPublicationsView(generics.ListAPIView):
502 503 504 505 506 507 508
    """
    API endpoint that allows the citations associated with
    each Unique RNA Sequence to be viewed.

    [API documentation](/api)
    """
    # the above docstring appears on the API website
509
    permission_classes = (AllowAny, )
510
    serializer_class = RawPublicationSerializer
511
    pagination_class = Pagination
512 513 514

    def get_queryset(self):
        upi = self.kwargs['pk']
515
        taxid = self.kwargs['taxid'] if 'taxid' in self.kwargs else None
516
        return Rna.objects.get(upi=upi).get_publications(taxid)  # this is actually a list
517

518 519 520 521 522 523 524 525 526 527 528

class ExpertDatabasesAPIView(APIView):
    """
    API endpoint describing expert databases, comprising RNAcentral.

    [API documentation](/api)
    """
    permission_classes = ()
    authentication_classes = ()

    def get(self, request, format=None):
529 530 531 532 533 534 535 536 537
        """The data from configuration JSON and database are combined here."""
        def _normalize_expert_db_label(expert_db_label):
            """Capitalizes db label (and accounts for special cases)"""
            if re.match('tmrna-website', expert_db_label, flags=re.IGNORECASE):
                expert_db_label = 'TMRNA_WEB'
            else:
                expert_db_label = expert_db_label.upper()
            return expert_db_label

538
        # e.g. { "TMRNA_WEB": {'name': 'tmRNA Website', 'label': 'tmrna-website', ...}}
539 540 541 542 543 544 545 546
        databases = { db['descr']:db for db in Database.objects.values() }

        # update config.expert_databases json with Database table objects
        for db in expert_dbs:
            normalized_label = _normalize_expert_db_label(db['label'])
            if normalized_label in databases:
                db.update(databases[normalized_label])

547 548
        return Response(expert_dbs)

549 550
    # def get_queryset(self):
    #     expert_db_name = self.kwargs['expert_db_name']
551
    #     return Database.objects.get(expert_db_name).references
552 553


554
class ExpertDatabasesStatsViewSet(RetrieveModelMixin, ListModelMixin, GenericViewSet):
555 556 557 558 559 560 561
    """
    API endpoint with statistics of databases, comprising RNAcentral.

    [API documentation](/api)
    """
    queryset = DatabaseStats.objects.all()
    serializer_class = ExpertDatabaseStatsSerializer
562
    lookup_field = 'pk'
563 564 565 566

    def list(self, request, *args, **kwargs):
        return super(ExpertDatabasesStatsViewSet, self).list(request, *args, **kwargs)

567 568 569
    def retrieve(self, request, *args, **kwargs):
        return super(ExpertDatabasesStatsViewSet, self).retrieve(request, *args, **kwargs)

570

571
class GenomesAPIViewSet(ListModelMixin, GenericViewSet):
572 573 574 575
    """API endpoint, presenting all E! assemblies, available in RNAcentral."""
    permission_classes = (AllowAny, )
    serializer_class = EnsemblAssemblySerializer
    pagination_class = Pagination
576
    queryset = EnsemblAssembly.objects.all().order_by('-ensembl_url')
577
    lookup_field = 'ensembl_url'
578 579


580
class RfamHitsAPIViewSet(generics.ListAPIView):
581
    """API endpoint with Rfam models that are found in an RNA."""
582
    permission_classes = (AllowAny, )
583 584
    serializer_class = RfamHitSerializer
    pagination_class = Pagination
585

586 587
    def get_queryset(self):
        upi = self.kwargs['pk']
Boris A. Burkov's avatar
Boris A. Burkov committed
588 589 590 591
        return RfamHit.objects.filter(upi=upi).select_related('rfam_model').select_related('upi')

    def get_serializer_context(self):
        return {'taxid': self.kwargs['taxid']} if 'taxid' in self.kwargs else {}
592 593


594
class SequenceFeaturesAPIViewSet(generics.ListAPIView):
595
    """API endpoint with sequence features (CRS, mature miRNAs etc)"""
596 597 598 599 600 601 602
    permission_classes = (AllowAny, )
    serializer_class = SequenceFeatureSerializer
    pagination_class = Pagination

    def get_queryset(self):
        upi = self.kwargs['pk']
        taxid = self.kwargs['taxid']
603
        return SequenceFeature.objects.filter(upi=upi, taxid=taxid, feature_name__in=["conserved_rna_structure", "mature_product"])
Anton Petrov's avatar
Anton Petrov committed
604 605


606 607 608 609 610 611
class RnaGoAnnotationsView(APIView):
    permission_classes = (AllowAny, )
    pagination_class = Pagination

    def get(self, request, pk, taxid, **kwargs):
        rna_id = pk + '_' + taxid
Blake Sweeney's avatar
Blake Sweeney committed
612
        taxid = int(taxid)
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
        annotations = GoAnnotation.objects.filter(rna_id=rna_id).\
            select_related('ontology_term', 'evidence_code')

        result = []
        for annotation in annotations:
            result.append({
                'rna_id': annotation.rna_id,
                'upi': pk,
                'taxid': taxid,
                'go_term_id': annotation.ontology_term.ontology_term_id,
                'go_term_name': annotation.ontology_term.name,
                'qualifier': annotation.qualifier,
                'evidence_code_id': annotation.evidence_code.ontology_term_id,
                'evidence_code_name': annotation.evidence_code.name,
                'assigned_by': annotation.assigned_by,
                'extensions': annotation.assigned_by or {},
            })

        return Response(result)
632 633


634 635 636 637 638 639 640 641 642 643 644 645
class EnsemblKaryotypeAPIView(APIView):
    """API endpoint, presenting E! karyotype for a given species."""
    permission_classes = ()
    authentication_classes = ()

    def get(self, request, ensembl_url):
        try:
            assembly = EnsemblAssembly.objects.filter(ensembl_url=ensembl_url).prefetch_related('karyotype').first()
        except EnsemblAssembly.DoesNotExist:
            raise Http404

        return Response(assembly.karyotype.first().karyotype)
646 647


648
class ProteinTargetsView(generics.ListAPIView):
649 650 651
    """API endpoint, presenting ProteinInfo, related to given rna."""
    permission_classes = ()
    authentication_classes = ()
652
    pagination_class = Pagination
653
    serializer_class = ProteinTargetsSerializer
654

655 656 657 658
    def get_queryset(self):
        pk = self.kwargs['pk']
        taxid = self.kwargs['taxid']

Boris A. Burkov's avatar
Boris A. Burkov committed
659 660
        # we select redundant {protein_info}.protein_accession because
        # otherwise django curses about lack of primary key in raw query
661
        protein_info_query = '''
Anton Petrov's avatar
Anton Petrov committed
662
            SELECT
Boris A. Burkov's avatar
Boris A. Burkov committed
663 664
                {related_sequence}.target_accession,
                {related_sequence}.source_accession,
665
                {related_sequence}.source_urs_taxid,
666
                {related_sequence}.methods,
Boris A. Burkov's avatar
Boris A. Burkov committed
667
                {protein_info}.protein_accession,
Anton Petrov's avatar
Anton Petrov committed
668 669
                {protein_info}.description,
                {protein_info}.label,
670
                {protein_info}.synonyms
671
            FROM {related_sequence}
Boris A. Burkov's avatar
Boris A. Burkov committed
672
            LEFT JOIN {protein_info}
673 674
            ON {protein_info}.protein_accession = {related_sequence}.target_accession
            WHERE {related_sequence}.relationship_type = 'target_protein'
675
              AND {related_sequence}.source_urs_taxid = '{pk}_{taxid}'
676 677 678 679 680 681 682 683 684
        '''.format(
            rna=Rna._meta.db_table,
            rna_precomputed=RnaPrecomputed._meta.db_table,
            related_sequence=RelatedSequence._meta.db_table,
            protein_info=ProteinInfo._meta.db_table,
            pk=pk,
            taxid=taxid
        )

685 686
        queryset = PaginatedRawQuerySet(protein_info_query, model=ProteinInfo)  # was: ProteinInfo.objects.raw(protein_info_query)
        return queryset
Anton Petrov's avatar
Anton Petrov committed
687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707


class LncrnaTargetsView(generics.ListAPIView):
    """API endpoint, presenting lncRNAs targeted by a given rna."""
    permission_classes = ()
    authentication_classes = ()
    pagination_class = Pagination
    serializer_class = LncrnaTargetsSerializer

    def get_queryset(self):
        pk = self.kwargs['pk']
        taxid = self.kwargs['taxid']

        # we select redundant {protein_info}.protein_accession because
        # otherwise django curses about lack of primary key in raw query
        protein_info_query = '''
            SELECT
                {related_sequence}.source_accession,
                {related_sequence}.source_urs_taxid,
                {related_sequence}.methods,
                {related_sequence}.target_urs_taxid,
708
                {rna_precomputed}.short_description as target_rna_description,
Anton Petrov's avatar
Anton Petrov committed
709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730
                {related_sequence}.target_accession,
                {protein_info}.protein_accession,
                {protein_info}.description as target_ensembl_description,
                {protein_info}.label,
                {protein_info}.synonyms
            FROM {related_sequence}
            LEFT JOIN {rna_precomputed}
            ON target_urs_taxid = {rna_precomputed}.id
            LEFT JOIN protein_info
            ON {protein_info}.protein_accession = {related_sequence}.target_accession
            WHERE {related_sequence}.relationship_type = 'target_rna'
              AND {related_sequence}.source_urs_taxid = '{pk}_{taxid}'
            ORDER BY target_urs_taxid
        '''.format(
            rna_precomputed=RnaPrecomputed._meta.db_table,
            related_sequence=RelatedSequence._meta.db_table,
            protein_info=ProteinInfo._meta.db_table,
            pk=pk,
            taxid=taxid
        )
        queryset = PaginatedRawQuerySet(protein_info_query, model=ProteinInfo)
        return queryset
731 732


Anton Petrov's avatar
Anton Petrov committed
733 734
class LargerPagination(Pagination):
    page_size = 50
Anton Petrov's avatar
Anton Petrov committed
735
    ensembl_compara_url = None
736
    compara_status = None
Anton Petrov's avatar
Anton Petrov committed
737 738 739 740 741 742 743 744 745

    def get_paginated_response(self, data):
        return Response({
            'links': {
               'next': self.get_next_link(),
               'previous': self.get_previous_link()
            },
            'count': self.page.paginator.count,
            'results': data,
Anton Petrov's avatar
Anton Petrov committed
746
            'ensembl_compara_url': self.ensembl_compara_url,
747
            'ensembl_compara_status': self.ensembl_compara_status,
Anton Petrov's avatar
Anton Petrov committed
748
        })
Anton Petrov's avatar
Anton Petrov committed
749 750


751 752 753 754
class EnsemblComparaAPIViewSet(generics.ListAPIView):
    """API endpoint for related sequences identified by Ensembl Compara"""
    permission_classes = (AllowAny, )
    serializer_class = EnsemblComparaSerializer
Anton Petrov's avatar
Anton Petrov committed
755
    pagination_class = LargerPagination
Anton Petrov's avatar
Anton Petrov committed
756
    ensembl_transcript_id = ''
757 758 759 760

    def get_queryset(self):
        upi = self.kwargs['pk']
        taxid = self.kwargs['taxid']
761 762
        self_urs_taxid = upi + '_' + taxid
        urs_taxid = EnsemblCompara.objects.filter(urs_taxid__id=self_urs_taxid).first()
763
        if urs_taxid:
Anton Petrov's avatar
Anton Petrov committed
764
            self.ensembl_transcript_id = urs_taxid.ensembl_transcript_id
765 766 767 768
            return EnsemblCompara.objects.filter(homology_id=urs_taxid.homology_id)\
                                         .exclude(urs_taxid=self_urs_taxid)\
                                         .order_by('urs_taxid__description')\
                                         .all()
769 770
        else:
            return []
Anton Petrov's avatar
Anton Petrov committed
771 772 773 774 775

    def list(self, request, *args, **kwargs):
        queryset = self.get_queryset()

        self.pagination_class.ensembl_compara_url = self.get_ensembl_compara_url()
776
        self.pagination_class.ensembl_compara_status = self.get_ensembl_compara_status()
Anton Petrov's avatar
Anton Petrov committed
777 778 779 780 781 782 783 784 785 786 787 788 789
        page = self.paginate_queryset(queryset)

        if page is not None:
            serializer = self.get_serializer(page, many=True)
            return self.get_paginated_response(serializer.data)

        serializer = self.get_serializer(queryset, many=True)

        return Response({'data': serializer.data})

    def get_ensembl_compara_url(self):
        urs_taxid = self.kwargs['pk']+ '_' + self.kwargs['taxid']
        genome_region = SequenceRegion.objects.filter(urs_taxid__id=urs_taxid).first()
Anton Petrov's avatar
Anton Petrov committed
790
        if genome_region and self.ensembl_transcript_id:
Anton Petrov's avatar
Anton Petrov committed
791 792
            return 'http://www.ensembl.org/' + genome_region.assembly.ensembl_url + '/Gene/Compara_Tree?t=' + self.ensembl_transcript_id
        else:
Anton Petrov's avatar
Anton Petrov committed
793
            return None
794 795 796 797 798

    def get_ensembl_compara_status(self):
        urs_taxid = self.kwargs['pk']+ '_' + self.kwargs['taxid']

        rna_precomputed = RnaPrecomputed.objects.get(id=urs_taxid)
Carlos Eduardo Ribas's avatar
Carlos Eduardo Ribas committed
799
        if rna_precomputed.databases and 'Ensembl' not in rna_precomputed.databases:
800 801 802 803 804 805 806 807 808 809 810 811 812
            return 'analysis not available'

        compara = EnsemblCompara.objects.filter(urs_taxid=urs_taxid).first()
        if compara:
            compara_count = EnsemblCompara.objects.filter(homology_id=compara.homology_id).count()

        if not compara or compara_count == 0:
            return 'RNA type not supported'

        if compara_count == 1:
            return 'not found'

        return 'found'