Unverified Commit d4962867 authored by Blake Sweeney's avatar Blake Sweeney Committed by GitHub
Browse files

Merge pull request #551 from RNAcentral/update-qa-information

Update qa information
parents d7053155 0592a0d3
......@@ -21,7 +21,7 @@ from rest_framework import serializers
from portal.models import Rna, Xref, Reference_map, ChemicalComponent, DatabaseStats, Accession, Reference, \
Modification, RfamHit, RfamModel, RfamClan, OntologyTerm, SequenceFeature, EnsemblAssembly, EnsemblKaryotype, \
ProteinInfo, EnsemblCompara, RnaPrecomputed, SecondaryStructureWithLayout
ProteinInfo, EnsemblCompara, RnaPrecomputed, SecondaryStructureWithLayout, QcStatus
class RawPublicationSerializer(serializers.ModelSerializer):
......@@ -443,17 +443,10 @@ class RfamModelSerializer(serializers.ModelSerializer):
class RfamHitSerializer(serializers.ModelSerializer):
rfam_model = RfamModelSerializer()
rfam_status = serializers.SerializerMethodField()
class Meta:
model = RfamHit
fields = ('sequence_start', 'sequence_stop', 'sequence_completeness', 'rfam_model', 'rfam_status')
def get_rfam_status(self, obj):
if 'taxid' in self.context:
return json.loads(obj.upi.get_rfam_status(self.context['taxid']).as_json())
else:
return json.loads(obj.upi.get_rfam_status().as_json())
fields = ('sequence_start', 'sequence_stop', 'sequence_completeness', 'rfam_model')
class SequenceFeatureSerializer(serializers.ModelSerializer):
......@@ -520,3 +513,9 @@ class RnaPrecomputedJsonSerializer(serializers.ModelSerializer):
def get_databases(self, obj):
return [database for database in obj.databases.split(',')] if obj.databases else []
class QcStatusSerializer(serializers.ModelSerializer):
class Meta:
model = QcStatus
fields = '__all__'
......@@ -54,6 +54,8 @@ urlpatterns = [
url(r'^rna/(?P<pk>URS[0-9A-Fa-f]{10})/protein-targets/(?P<taxid>\d+)/?$', cache_page(CACHE_TIMEOUT)(views.ProteinTargetsView.as_view()), name='rna-protein-targets'),
# target lncRNA for RNA (species-specific)
url(r'^rna/(?P<pk>URS[0-9A-Fa-f]{10})/lncrna-targets/(?P<taxid>\d+)/?$', cache_page(CACHE_TIMEOUT)(views.LncrnaTargetsView.as_view()), name='rna-lncrna-targets'),
# Information about the qc status for a given sequence
url(r'^rna/(?P<pk>URS[0-9A-Fa-f]{10})/qc-status/(?P<taxid>\d+)/?$', cache_page(CACHE_TIMEOUT)(views.QcStatusView.as_view()), name='qc-status'),
# literature citations associated with ENA records
url(r'^accession/(?P<pk>.*?)/citations/?$', cache_page(CACHE_TIMEOUT)(views.CitationsView.as_view()), name='accession-citations'),
# view for an individual cross-reference
......
......@@ -37,11 +37,13 @@ from apiv1.serializers import RnaNestedSerializer, AccessionSerializer, Citation
RawPublicationSerializer, RnaSecondaryStructureSerializer, \
RfamHitSerializer, SequenceFeatureSerializer, \
EnsemblAssemblySerializer, ProteinTargetsSerializer, \
LncrnaTargetsSerializer, EnsemblComparaSerializer, SecondaryStructureSVGImageSerializer
LncrnaTargetsSerializer, EnsemblComparaSerializer, SecondaryStructureSVGImageSerializer, \
QcStatusSerializer
from apiv1.renderers import RnaFastaRenderer
from portal.models import Rna, RnaPrecomputed, Accession, Database, DatabaseStats, RfamHit, EnsemblAssembly,\
GoAnnotation, RelatedSequence, ProteinInfo, SequenceFeature, SequenceRegion, EnsemblCompara
GoAnnotation, RelatedSequence, ProteinInfo, SequenceFeature, SequenceRegion, EnsemblCompara,\
QcStatus
from portal.config.expert_databases import expert_dbs
from rnacentral.utils.pagination import Pagination, LargeTablePagination
......@@ -590,7 +592,7 @@ class SequenceFeaturesAPIViewSet(generics.ListAPIView):
def get_queryset(self):
upi = self.kwargs['pk']
taxid = self.kwargs['taxid']
return SequenceFeature.objects.filter(upi=upi, taxid=taxid, feature_name__in=["conserved_rna_structure", "mature_product"])
return SequenceFeature.objects.filter(upi=upi, taxid=taxid, feature_name__in=["conserved_rna_structure", "mature_product", "cpat_orf"])
class RnaGoAnnotationsView(APIView):
......@@ -722,6 +724,18 @@ class LncrnaTargetsView(generics.ListAPIView):
return queryset
class QcStatusView(APIView):
"""API endpoint showing the QC status for a sequence"""
permission_classes = ()
authentication_classes = ()
def get(self, _request, pk, taxid):
urs_taxid = f'{pk}_{taxid}'
status = QcStatus.objects.get(id=urs_taxid)
serializer = QcStatusSerializer(status)
return Response(serializer.data)
class LargerPagination(Pagination):
page_size = 50
ensembl_compara_url = None
......
......@@ -92,7 +92,7 @@ class Command(BaseCommand):
'help-gene-ontology-annotations',
'help-genomic-mapping',
'help-public-database',
'help-rfam-annotations',
'help-qc',
'help-rna-target-interactions',
'help-scientific-advisory-board',
'help-secondary-structure',
......
......@@ -34,3 +34,4 @@ from .sequence_regions import *
from .sequence_exons import *
from .taxonomy import *
from .ensembl_compara import *
from .qc_status import *
"""
Copyright [2009-2021] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from django.db import models
from django.contrib.postgres.fields import JSONField
class QcStatus(models.Model):
id = models.ForeignKey(
'RnaPrecomputed',
primary_key=True,
db_column='rna_id',
to_field='id',
related_name='qc_status',
on_delete=models.CASCADE,
)
upi = models.ForeignKey(
'Rna',
db_column='upi',
to_field='upi',
related_name='qc_statuses',
on_delete=models.CASCADE,
)
taxid = models.IntegerField()
has_issue = models.BooleanField()
incomplete_sequence = models.BooleanField()
possible_contamination = models.BooleanField()
missing_rfam_match = models.BooleanField()
from_repetitive_region = models.BooleanField()
possible_orf = models.BooleanField()
messages = JSONField()
class Meta:
db_table = 'qa_status'
......@@ -128,7 +128,7 @@ class DomainProblem(object):
match_domain=model.domain,
model_url=model.url,
model_name=model.short_name,
help_url=reverse('help-rfam-annotations'),
help_url=reverse('help-qc'),
)
)
......
......@@ -52,6 +52,7 @@ angular.module("routes", []).service('routes', ['$interpolate', function($interp
apiGoTermsView: '/api/v1/rna/{{ upi }}/go-annotations/{{ taxid }}',
quickGoSummaryPage: 'https://www.ebi.ac.uk/QuickGO/term/{{ term_id }}',
quickGoChart: 'https://www.ebi.ac.uk/QuickGO/services/ontology/{{ ontology }}/terms/{{ term_ids }}/chart?base64=true',
qcStatusApi: '/api/v1/rna/{{ upi }}/qc-status/{{ taxid }}',
};
......
......@@ -47,7 +47,7 @@ var sequenceSearchController = function($scope, $http, $timeout, $location, $q,
};
$scope.help = {
rfam: "/help/rfam-annotations",
rfam: "/help/qc",
crs: "/help/conserved-motifs",
go: "/help/gene-ontology-annotations",
genomeMapping: "/help/genomic-mapping"
......
......@@ -13,7 +13,7 @@ var crs = {
ctrl.distinctFeatures = [];
// aggregate features with same id and different locations
ctrl.features.forEach(function(feature) {
ctrl.features.conserved_rna_structure.features.forEach(function(feature) {
var featureClone = ctrl.distinctFeatures.find(function(el) { return el.metadata.crs_id === feature.metadata.crs_id });
if (!featureClone) {
featureClone = JSON.parse(JSON.stringify(feature));
......@@ -38,4 +38,4 @@ var crs = {
templateUrl: '/static/js/components/sequence/crs/crs.html'
};
angular.module("rnaSequence").component("crs", crs);
\ No newline at end of file
angular.module("rnaSequence").component("crs", crs);
angular.module("rnaSequence").component("qcStatus", {
bindings: {
qcStatus: '<',
},
controller: ['$http', '$interpolate', 'routes', function($http, $interpolate, routes) {
var ctrl = this;
}],
templateUrl: '/static/js/components/sequence/qc-status/qc-status.html'
});
<div>
<h2 class="margin-bottom-0px">
QC Status
</h2>
<div ng-switch="$ctrl.qcStatus">
<span ng-switch-when="pending">
<i class="fa fa-spinner fa-spin fa-2x"></i>
Loading QC information...
</span>
<span ng-switch-when="failed">
<p>Failed to fetch QC data</p>
</span>
<div ng-switch-default>
<div ng-switch="$ctrl.qcStatus.has_issue">
<ul ng-switch-when="true" class="list-inline" style="margin-top: 5px; margin-bottom: 1.5em;">
<li ng-repeat="problem in $ctrl.qcStatus.messages">
<div class="alert alert-warning" style="margin-bottom: 0px">
<i class="fa fa-warning fa-2x pull-left text-danger" aria-hidden="true"></i>
<span>
<span ng-bind-html="problem"></span>
</span>
</div>
</li>
</ul>
<span ng-switch-when="false"><p>No issues found</p></span>
</div>
</div>
</div>
</div>
......@@ -10,7 +10,7 @@ var rfam = {
var ctrl = this;
ctrl.$onInit = function() {
ctrl.help = "/help/rfam-annotations";
ctrl.help = "/help/qc";
// group hits with same rfam_model_id
ctrl.groupedHits = [];
......
......@@ -16,17 +16,6 @@
</small>
</h2>
<ul ng-if="$ctrl.rfamHits.length > 0 && $ctrl.rfamHits[0].rfam_status.has_issue" class="list-inline" style="margin-top: 5px; margin-bottom: 1.5em;">
<li ng-repeat="problem in $ctrl.rfamHits[0].rfam_status.problems">
<div class="alert alert-warning" style="margin-bottom: 0px">
<i class="fa fa-warning fa-2x pull-left text-danger" aria-hidden="true"></i>
<span>
<span ng-bind-html="problem.message"></span>
</span>
</div>
</li>
</ul>
<ul ng-if="$ctrl.groupedHits.length > 0" class="media-list">
<li ng-repeat="hit in $ctrl.groupedHits" ng-class="{ 'col-md-10 media': $ctrl.groupedHits.length === 1, 'col-md-6 media': $ctrl.groupedHits.length > 1 }" style="margin-bottom: 1.5em; padding-left: 5px;">
......
<div class="qc-warning-tooltip">
<span ng-repeat="warning in rna.fields.qc_warning | plaintext">
<p ng-if="warning === 'possible_contamination'">This Eukaryotic sequence matches a Bacterial Rfam family</p>
<p ng-if="warning === 'possible_orf'">This is a new message</p>
<p ng-if="warning === 'possible_orf'">This sequence contains a possible ORF</p>
<p ng-if="warning === 'incomplete_sequence'">This sequence matches only part of Rfam model</p>
<p ng-if="warning === 'missing_match'">This sequence does not match the expected Rfam family</p>
</span>
<p>
<a href="/help/rfam-annotations" style="color: #fff">Learn more &rarr;</a>
<a href="/help/qc" style="color: #fff">Learn more &rarr;</a>
</p>
</div>
......@@ -225,6 +225,7 @@ limitations under the License.
<script src="{% static "js/components/sequence/protein-targets/protein-targets.component.js" %}"></script>
<script src="{% static "js/components/sequence/lncrna-targets/lncrna-targets.component.js" %}"></script>
<script src="{% static "js/components/sequence/rfam/rfam.component.js" %}"></script>
<script src="{% static "js/components/sequence/qc-status/qc-status.component.js" %}"></script>
<script src="{% static "js/components/sequence-search/nhmmer.sequence.search.js" %}"></script>
<script src="{% static "js/components/sequence-search/sequence-search.module.js" %}"></script>
......
## What is Rfam?
[Rfam](http://rfam.org) is a database of functional non-coding RNA families represented by multiple sequence alignments and consensus secondary structures. The sequence and structural information is used to build [Infernal](http://eddylab.org/infernal/) covariance models, which can be used to find new instances of RNA families and annotate genomes with non-coding RNAs.
RNAcentral provides a variety of quality checks for all sequences. Many of these checks are based off of [Rfam](http://rfam.org). [Rfam](http://rfam.org) is a database of functional non-coding RNA families represented by multiple sequence alignments and consensus secondary structures. The sequence and structural information is used to build [Infernal](http://eddylab.org/infernal/) covariance models, which can be used to find new instances of RNA families and annotate genomes with non-coding RNAs.
Every release RNAcentral annotates **all sequences** with Rfam models. Rfam classification provides additional context to sequences with few annotations and help identify **potential problems**, for example, sequences which are likely contamination.
## Three types of quality control using Rfam
In addition to Rfam based checks we also use [CPAT](https://academic.oup.com/nar/article/41/6/e74/2902455) to analyze sequences. This tool detects possible open reading frames in sequences. We analyze **all** human, fly, mouse and zebrafish sequences this way. We use CPAT version 3.0.4 with the default options.
## Current types of quality control
### 1. Incomplete sequences
......@@ -43,6 +43,17 @@ The majority of RNAcentral sequences annotated as rRNA or tRNA match the corresp
Browse all sequences with missing matches by searching for [`qc_warning:"missing_rfam_match"`](/search?q=qc_warning:%22missing_rfam_match%22).
### 4. Possible ORFs
Very few sequences contain open reading frames, however, it is worth noting which do as this may the function of the ncRNA.
#### Examples
* [Human lncRNA](/rna/URS00008D8914/9606)
* [Fly rRNA](/rna/URS0000745350/7227)
Browse all sequences with possible ORFs by searching for [`qc_warning:"possible_orf"`](/search?q=qc_warning:%22possible_orf%22)
## Why some sequences do not match any Rfam families
There are several possible reasons:
......
......@@ -15,12 +15,12 @@ limitations under the License.
{% block meta_tags %}
{{ block.super }}
<meta name="description" content="Using Rfam annotations in RNAcentral"/>
<meta name="twitter:description" content="Using Rfam annotations in RNAcentral"/>
<meta name="description" content="Using QC checks in RNAcentral"/>
<meta name="twitter:description" content="Using QC checks in RNAcentral"/>
{% endblock %}
{% block title %}
Help: Rfam annotations
Help: QC checks
{% endblock %}
{% block content %}
......@@ -28,16 +28,16 @@ Help: Rfam annotations
<div class="row">
<div class="col-md-8" role="main">
<h1><i class="fa fa-map-marker"></i> Rfam annotations</h1>
<h1><i class="fa fa-map-marker"></i> Quality Control</h1>
{% load markdown_deux_tags %}
{% markdown %}
{% include 'portal/docs/rfam-annotations.md' %}
{% include 'portal/docs/qc.md' %}
{% endmarkdown %}
<hr>
<a class="btn btn-default btn-sm" title="Edit on GitHub" href="https://github.com/RNAcentral/rnacentral-webcode/edit/master/rnacentral/portal/templates/portal/docs/rfam-annotations.md" target="_blank"><i class="fa fa-pencil-square-o"></i> Improve this page</a>
<a class="btn btn-default btn-sm" title="Edit on GitHub" href="https://github.com/RNAcentral/rnacentral-webcode/edit/master/rnacentral/portal/templates/portal/docs/qc.md" target="_blank"><i class="fa fa-pencil-square-o"></i> Improve this page</a>
</div>
<div class="col-md-4">
......
......@@ -19,7 +19,7 @@
<strong>Functional annotations</strong>
<li><a href="{% url 'help-genomic-mapping' %}">Genomic mapping</a></li>
<li><a href="{% url 'help-secondary-structure' %}">Secondary structure <span class="label label-success">New<span></a></li>
<li><a href="{% url 'help-rfam-annotations' %}">Rfam annotations</a></li>
<li><a href="{% url 'help-qc' %}">QC annotations</a></li>
<li><a href="{% url 'help-gene-ontology-annotations' %}">Gene Ontology annotations</a></li>
<li><a href="{% url 'help-rna-target-interactions' %}">RNA-target interactions</a></li>
<li><a href="{% url 'help-conserved-motifs' %}">Conserved RNA motifs</a></li>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment