Unverified Commit fabb2d94 authored by Anton Petrov's avatar Anton Petrov Committed by GitHub
Browse files

Merge pull request #511 from RNAcentral/release-16

Release 16
parents be1ea3aa 97d2d825
......@@ -12,6 +12,8 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
import re
import requests
import zlib
from itertools import chain
from django.http import Http404, HttpResponse
......@@ -362,21 +364,45 @@ class SecondaryStructureSVGImage(generics.ListAPIView):
permission_classes = (AllowAny,)
def get(self, request, pk=None, format=None):
ftp = "http://ftp.ebi.ac.uk/pub/databases/RNAcentral/current_release/.secondary-structure/secondary-structure/{}.svg.gz"
upi = list(self.kwargs['pk'])
upi_path = "".join(upi[0:3]) + "/" \
+ "".join(upi[3:5]) + "/" \
+ "".join(upi[5:7]) + "/" \
+ "".join(upi[7:9]) + "/" \
+ "".join(upi[9:11]) + "/"
url = ftp.format(upi_path + "".join(upi))
try:
response = requests.get(url)
response.raise_for_status()
svg_ftp = zlib.decompress(response.content, zlib.MAX_WBITS | 32)
except requests.exceptions.HTTPError as e:
svg_ftp = None
try:
upi = self.kwargs['pk']
image = SecondaryStructureWithLayout.objects.get(urs=upi)
svg_bd = SecondaryStructureWithLayout.objects.get(urs="".join(upi))
svg_bd = svg_bd.layout
except SecondaryStructureWithLayout.DoesNotExist:
svg_bd = None
if not svg_ftp and not svg_bd:
return Response(status=status.HTTP_404_NOT_FOUND)
return HttpResponse(self.generate_thumbnail(image.layout, upi), content_type='image/svg+xml')
return HttpResponse(
self.generate_thumbnail(svg_ftp if svg_ftp else svg_bd, "".join(upi)), content_type='image/svg+xml'
)
def generate_thumbnail(self, image, upi):
move_to_start_position = None
color = ColorHash(upi).hex
points = []
width = []
height = []
for i, line in enumerate(image.split('\n')):
if i == 0:
if not width:
width = re.findall(r'width="(\d+(\.\d+)?)"', line)
if not height:
height = re.findall(r'height="(\d+(\.\d+)?)"', line)
for nt in re.finditer('<text x="(\d+)(\.\d+)?" y="(\d+)(\.\d+)?".*?</text>', line):
if 'numbering-label' in nt.group(0):
......
......@@ -91,7 +91,7 @@ expert_dbs = [
],
'imported': True,
'status': 'updated',
'version': 'as of December 10th, 2019',
'version': 'as of July 20th, 2020',
},
{
'name': 'FlyBase',
......@@ -116,7 +116,7 @@ expert_dbs = [
],
'imported': True,
'status': 'updated',
'version': 'FB2020_02',
'version': 'FB2020_03',
},
{
'name': 'Ensembl',
......@@ -147,7 +147,7 @@ expert_dbs = [
],
'imported': True,
'status': 'updated',
'version': '98',
'version': '100',
},
{
'name': 'Ensembl Plants',
......@@ -296,8 +296,8 @@ expert_dbs = [
},
],
'imported': True,
'status': '',
'version': '14.1',
'status': 'updated',
'version': '14.2',
},
{
'name': 'miRBase',
......@@ -327,7 +327,7 @@ expert_dbs = [
}
],
'imported': True,
'status': 'updated',
'status': '',
'version': '22.1',
},
{
......@@ -502,7 +502,7 @@ expert_dbs = [
],
'imported': True,
'status': 'updated',
'version': '97', # ftp://ftp.ncbi.nlm.nih.gov/refseq/release/RELEASE_NUMBER
'version': '201', # ftp://ftp.ncbi.nlm.nih.gov/refseq/release/RELEASE_NUMBER
},
{
'name': 'RDP',
......@@ -530,17 +530,28 @@ expert_dbs = [
'version': '',
},
{
'name': 'CRW Site',
'label': '',
'url': 'http://www.rna.ccbb.utexas.edu/',
'description': 'comparative sequence and structure information for ribosomal, intron, and other RNAs',
'hint': 'CRW Site comparative sequence and structure information for ribosomal, intron, and other RNAs',
'tags': ['curated', 'rRNA'],
'name': 'CRW',
'label': 'crw',
'url': 'http://crw-site.chemistry.gatech.edu/',
'description': 'provides comparative sequence and structure information for ribosomal, intron, and other RNAs',
'hint': 'CRW provides comparative sequence and structure information for ribosomal, intron, and other RNAs',
'tags': ['curated', 'SSU rRNA', '5S rRNA'],
'abbreviation': 'Comparative RNA Website',
'examples': '',
'references': [],
'imported': False,
'status': '',
'examples': [
{'upi': 'URS0001BCA6C0', 'taxid': 562}, # E.coli SSU
{'upi': 'URS0001BCA4A9', 'taxid': 9606}, # Human SSU
{'upi': 'URS0001BCA572', 'taxid': 9606}, # Human 5S
],
'references': [
{
'title': 'The comparative RNA web (CRW) site: an online database of comparative sequence and structure information for ribosomal, intron, and other RNAs',
'authors': 'Jamie J Cannone, Sankar Subramanian, Murray N Schnare, James R Collett, Lisa M DSouza, Yushi Du, Brian Feng, Nan Lin, Lakshmi V Madabusi, Kirsten M Muller, Nupur Pande, Zhidi Shang, Nan Yu, Robin R Gutell',
'journal': 'BMC Bioinformatics. 2002;3:2',
'pubmed_id': '11869452',
},
],
'imported': True,
'status': 'new',
'version': '',
},
{
......@@ -807,8 +818,8 @@ expert_dbs = [
},
],
'imported': True,
'status': '',
'version': '122',
'status': 'updated',
'version': 'r138',
},
{
'name': 'SGD',
......@@ -862,16 +873,51 @@ expert_dbs = [
},
{
'name': 'snoRNA Database',
'label': '',
'label': 'snorna_database',
'url': 'http://lowelab.ucsc.edu/snoRNAdb/',
'description': 'predicted snoRNA genes',
'hint': 'snoRNA database contains predicted snoRNA genes',
'description': 'is a curated collection of archaeal snoRNAs maintained by the Lowe Lab at UC Santa Cruz',
'hint': 'The snoRNA Database is a curated collection of archaeal snoRNAs maintained by the Lowe Lab at UC Santa Cruz',
'tags': ['automatic', 'curated', 'snoRNA'],
'abbreviation': '',
'examples': '',
'references': [],
'imported': False,
'status': '',
'examples': [
{'upi': 'URS0000600702', 'taxid': 340102},
{'upi': 'URS000020B9CF', 'taxid': 698757},
{'upi': 'URS00000A48A9', 'taxid': 698757},
],
'references': [
{
'title': 'Homologs of small nucleolar RNAs in Archaea',
'authors': 'A D Omer, T M Lowe, A G Russell, H Ebhardt, S R Eddy, P P Dennis',
'journal': 'Science. 2000 Apr 21;288(5465):517-22',
'pubmed_id': '10775111',
},
{
'title': 'Archaeal homologs of eukaryotic methylation guide small nucleolar RNAs: lessons from the Pyrococcus genomes',
'authors': 'C Gaspin, J Cavaille, G Erauso, J P Bachellerie',
'journal': 'J Mol Biol. 2000 Apr 7;297(4):895-906',
'pubmed_id': '10736225',
},
{
'title': 'Methylation guide RNA evolution in archaea: structure, function and genomic organization of 110 C/D box sRNA families across six Pyrobaculum species',
'authors': 'Lauren M Lui, Andrew V Uzilov, David L Bernick, Andrea Corredor, Todd M Lowe, Patrick P Dennis',
'journal': 'Nucleic Acids Res. 2018 Jun 20;46(11):5678-5691',
'pubmed_id': '29771354',
},
{
'title': 'Diversity of Antisense and Other Non-Coding RNAs in Archaea Revealed by Comparative Small RNA Sequencing in Four Pyrobaculum Species',
'authors': 'David L Bernick, Patrick P Dennis, Lauren M Lui, Todd M Lowe',
'journal': 'Front Microbiol. 2012 Jul 2;3:231',
'pubmed_id': '22783241',
},
{
'title': 'Complete genome sequence of Pyrobaculum oguniense',
'authors': 'David L Bernick, Kevin Karplus, Lauren M Lui, Joanna K C Coker, Julie N Murphy, Patricia P Chan, Aaron E Cozen, Todd M Lowe',
'journal': 'Stand Genomic Sci. 2012 Jul 30;6(3):336-45',
'pubmed_id': '23407329',
},
],
'imported': True,
'status': 'new',
'version': '',
},
{
......@@ -1088,7 +1134,7 @@ expert_dbs = [
},
],
'imported': True,
'status': 'new',
'status': '',
'version': '17',
},
{
......@@ -1159,7 +1205,7 @@ expert_dbs = [
},
],
'imported': True,
'status': '',
'status': 'updated',
'version': '',
},
{
......@@ -1184,7 +1230,7 @@ expert_dbs = [
},
],
'imported': True,
'status': 'new',
'status': '',
'version': '1.1.0',
},
{
......@@ -1209,15 +1255,15 @@ expert_dbs = [
},
],
'imported': True,
'status': 'new',
'status': '',
'version': '2.0',
},
{
'name': 'MalaCards',
'label': 'malacards',
'url': 'https://www.malacards.org/',
'description': 'is an integrated database of human diseases and their annotations',
'hint': 'MalaCards is an integrated database of human diseases and their annotations',
'description': 'integrates manually-curated and text-mining sources to associate genes, including ncRNAs, with diseases, and lists the supporting evidence',
'hint': 'MalaCards integrates manually-curated and text-mining sources to associate genes, including ncRNAs, with diseases, and lists the supporting evidence',
'tags': ['disease', 'human'],
'abbreviation': '',
'examples': [
......@@ -1234,7 +1280,7 @@ expert_dbs = [
},
],
'imported': True,
'status': 'new',
'status': '',
'version': '4.12',
},
{
......@@ -1259,8 +1305,8 @@ expert_dbs = [
},
],
'imported': True,
'status': 'new',
'version': '4.12',
'status': '',
'version': '4.14',
},
{
'name': 'CRS',
......@@ -1283,8 +1329,8 @@ expert_dbs = [
},
],
'imported': True,
'status': '',
'version': '2.0',
'status': 'updated',
'version': '2.1',
},
{
'name': 'IntAct',
......@@ -1308,7 +1354,46 @@ expert_dbs = [
},
],
'imported': True,
'status': '',
'version': '',
},
{
'name': 'ZFIN',
'label': 'zfin',
'url': 'https://zfin.org',
'description': 'is the database of genetic and genomic data for the zebrafish (Danio rerio) as a model organism',
'hint': 'The Zebrafish Information Network (ZFIN) is the database of genetic and genomic data for the zebrafish (Danio rerio) as a model organism',
'tags': ['curated', 'model organism', 'zebrafish'],
'abbreviation': 'The Zebrafish Information Network',
'examples': [
{'upi': 'URS00003B6A21', 'taxid': 7955}, # mir196c
{'upi': 'URS00008E3972', 'taxid': 7955}, # linc.alien
{'upi': 'URS0000A8261D', 'taxid': 7955}, # dre-let-7a-1
],
'references': [
{
'title': 'The Zebrafish Information Network: new support for non-coding genes, richer Gene Ontology annotations and the Alliance of Genome Resources',
'authors': 'Leyla Ruzicka, Douglas G Howe, Sridhar Ramachandran, Sabrina Toro, Ceri E Van Slyke, Yvonne M Bradford, Anne Eagle, David Fashena, Ken Frazer, Patrick Kalita, Prita Mani, Ryan Martin, Sierra Taylor Moxon, Holly Paddock, Christian Pich, Kevin Schaper, Xiang Shao, Amy Singer, Monte Westerfield',
'journal': 'Nucleic Acids Res. 2019 Jan 8;47(D1):D867-D873',
'pubmed_id': '30407545',
},
],
'imported': True,
'status': 'new',
'version': '',
},
{
'name': 'snoRNA Atlas',
'label': 'snoatlas',
'url': 'http://snoatlas.bioinf.uni-leipzig.de/',
'description': '',
'hint': 'snoRNA Atlas is a database of human snoRNAs',
'tags': ['', '', ''],
'abbreviation': '',
'examples': [],
'references': [],
'imported': False,
'status': '',
'version': '',
},
]
......@@ -20,9 +20,9 @@ examples = [
'description': 'Lysine riboswitch RNA from Thermotoga maritima',
},
{
'uid': 'URS0000A5F9D7',
'taxid': '511145',
'description': 'E. coli Moco riboswitch',
'uid': 'URS000080E29A',
'taxid': '32630',
'description': 'ykoK riboswitch from Bacillus subtilis',
},
{
'uid': 'URS0000D6A50B',
......
......@@ -216,7 +216,7 @@ class Accession(models.Model):
'MALACARDS': 'https://www.genecards.org/cgi-bin/carddisp.pl?gene={id}#diseases',
'GENECARDS': 'https://www.genecards.org/cgi-bin/carddisp.pl?gene={id}',
}
if self.database in ['GTRNADB', 'ZWD', 'SNODB', 'MIRGENEDB', '5SRRNADB']:
if self.database in ['GTRNADB', 'ZWD', 'SNODB', 'MIRGENEDB', '5SRRNADB', 'SILVA', 'SNORNADB', 'ZFIN']:
try:
data = json.loads(self.note)
url = data['url'] if 'url' in data else ''
......@@ -232,9 +232,6 @@ class Accession(models.Model):
elif self.database == 'VEGA':
return urls[self.database].format(id=self.optional_id,
species=self.species.replace(' ', '_'))
elif self.database == 'SILVA':
return urls[self.database].format(id=self.optional_id,
lsu_ssu='ssu' if 'small' in self.product else 'lsu')
elif self.database == 'GREENGENES':
return urls[self.database].format(id=self.parent_ac, version=self.seq_version)
elif self.database == 'REFSEQ':
......
......@@ -16,6 +16,7 @@ import operator as op
import itertools as it
from collections import Counter, defaultdict
import re
import zlib
from caching.base import CachingMixin, CachingManager
from django.conf import settings
......@@ -541,6 +542,23 @@ class Rna(CachingMixin, models.Model):
if not layout:
return {}
# added for release-16. Layout comes from the FTP
ftp = "http://ftp.ebi.ac.uk/pub/databases/RNAcentral/current_release/.secondary-structure/secondary-structure/{}.svg.gz"
upi = list(self.pk)
upi_path = "".join(upi[0:3]) + "/" \
+ "".join(upi[3:5]) + "/" \
+ "".join(upi[5:7]) + "/" \
+ "".join(upi[7:9]) + "/" \
+ "".join(upi[9:11]) + "/"
url = ftp.format(upi_path + "".join(upi))
try:
response = requests.get(url)
response.raise_for_status()
svg = zlib.decompress(response.content, zlib.MAX_WBITS | 32)
except requests.exceptions.HTTPError as e:
svg = None
# model_name = layout.template.model_name
# if model_name.count('.') >= 2:
# template_source = 'CRW'
......@@ -557,7 +575,7 @@ class Rna(CachingMixin, models.Model):
'secondary_structure': layout.secondary_structure,
'source': layout.template.model_source,
'model_id': layout.template.model_name,
'layout': layout.layout,
'layout': svg if svg else layout.layout,
'template_species': layout.template.taxid.name,
'template_lineage': layout.template.taxid.lineage,
}
......
......@@ -18,7 +18,6 @@ if the sequence is only a partial sequence
import requests
class RnaSummary(object):
"""
This objects retrieves the information required for generating an automated
......@@ -36,27 +35,30 @@ class RnaSummary(object):
if len(raw_data['entries']) == 0:
self.entry_found = False
return
entry = raw_data['entries'][0]['fields']
self.entry_found = True
self.citations_count = raw_data['entries'][0]['fields']['n_citations'][0]
self.common_name = raw_data['entries'][0]['fields']['common_name'][0] if raw_data['entries'][0]['fields']['common_name'] else ''
self.databases = raw_data['entries'][0]['fields']['expert_db']
self.citations_count = entry['n_citations'][0]
self.common_name = entry['common_name'][0] if entry['common_name'] else ''
self.databases = entry['expert_db']
self.database_count = len(self.databases)
self.description = raw_data['entries'][0]['fields']['description'][0]
self.genes = raw_data['entries'][0]['fields']['gene']
self.has_genomic_coordinates = raw_data['entries'][0]['fields']['has_genomic_coordinates'][0]
self.has_go_annotations = raw_data['entries'][0]['fields']['has_go_annotations'][0]
self.has_interacting_proteins = raw_data['entries'][0]['fields']['has_interacting_proteins'][0]
self.has_interacting_rnas = raw_data['entries'][0]['fields']['has_interacting_rnas'][0] if len(raw_data['entries'][0]['fields']['has_interacting_rnas']) > 0 else None
self.has_secondary_structure = raw_data['entries'][0]['fields']['has_secondary_structure'][0]
self.interacting_proteins = raw_data['entries'][0]['fields']['interacting_protein']
self.interacting_rnas = raw_data['entries'][0]['fields']['interacting_rna']
self.length = raw_data['entries'][0]['fields']['length'][0]
self.product = raw_data['entries'][0]['fields']['interacting_protein']
self.rfam_family_name = raw_data['entries'][0]['fields']['rfam_family_name']
self.rfam_id = raw_data['entries'][0]['fields']['rfam_id']
self.description = entry['description'][0] if len(entry['description']) > 0 else ''
self.genes = entry['gene']
self.has_genomic_coordinates = entry['has_genomic_coordinates'][0]
self.has_go_annotations = entry['has_go_annotations'][0]
self.has_interacting_proteins = entry['has_interacting_proteins'][0]
self.has_interacting_rnas = entry['has_interacting_rnas'][0] if len(entry['has_interacting_rnas']) > 0 else None
self.has_secondary_structure = entry['has_secondary_structure'][0]
self.interacting_proteins = entry['interacting_protein']
self.interacting_rnas = entry['interacting_rna']
self.length = entry['length'][0]
self.product = entry['interacting_protein']
self.rfam_family_name = entry['rfam_family_name']
self.rfam_id = entry['rfam_id']
self.rfam_count = len(self.rfam_id)
self.rna_type = raw_data['entries'][0]['fields']['rna_type'][0]
self.species = raw_data['entries'][0]['fields']['species'][0] if len(raw_data['entries'][0]['fields']['species']) > 0 else ''
self.rna_type = entry['rna_type'][0]
self.species = entry['species'][0] if len(entry['species']) > 0 else ''
self.pretty_so_rna_type = self.parse_so_rna_type(entry['so_rna_type'][0]) if len(entry['so_rna_type']) > 0 else ''
self.so_rna_type = self.convert_string_to_array(entry['so_rna_type'][0]) if len(entry['so_rna_type']) > 0 else ''
def get_raw_data(self, urs, taxid):
......@@ -79,6 +81,7 @@ class RnaSummary(object):
'rfam_id',
'rna_type',
'species',
'so_rna_type',
]
url = '{endpoint}/entry/{urs}_{taxid}?format=json&fields={fields}'.format(
urs=urs,
......@@ -90,12 +93,41 @@ class RnaSummary(object):
return data.json()
def get_species_count(self, urs):
url = '{endpoint}?query={urs}*&format=json'.format(
url = '{endpoint}?query={urs}* NOT TAXONOMY:"{taxid}"&format=json'.format(
urs=urs,
endpoint=self.endpoint
endpoint=self.endpoint,
taxid=self.taxid
)
try:
data = requests.get(url)
return data.json()['hitCount']
return max(int(data.json()['hitCount']), 1)
except:
return 1
def convert_string_to_array(self, so_rna_type):
so_terms = so_rna_type.split('/')
so_terms.remove('')
if len(so_terms) > 1:
so_terms.remove('ncRNA')
return so_terms
def pretty_so_terms(self, so_term):
exceptions = ['RNase_P_RNA', 'SRP_RNA', 'Y_RNA', 'RNase_MRP_RNA']
if so_term not in exceptions:
so_term = so_term[0].lower() + so_term[1:]
if so_term == 'lnc_RNA':
so_term = 'lncRNA'
elif so_term == 'pre_miRNA':
so_term = 'pre-miRNA'
else:
so_term = so_term.replace('_', ' ')
return so_term
def parse_so_rna_type(self, so_rna_type):
so_terms = self.convert_string_to_array(so_rna_type)
if not so_terms:
return ''
pretty_so_terms = []
for so_term in so_terms:
pretty_so_terms.append(self.pretty_so_terms(so_term))
return pretty_so_terms
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:osb="http://www.openswatchbook.org/uri/2009/osb"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
id="svg2"
version="1.1"
inkscape:version="0.91 r13725"
width="433.82352"
height="232.35294"
viewBox="0 0 433.8235 232.35292"
sodipodi:docname="crs_logo.svg">
<metadata
id="metadata8">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title />
</cc:Work>
</rdf:RDF>
</metadata>
<defs
id="defs6">
<linearGradient
id="linearGradient5617"
osb:paint="solid">
<stop
style="stop-color:#000000;stop-opacity:1;"
offset="0"
id="stop5619" />
</linearGradient>
<linearGradient
id="linearGradient5607"
osb:paint="solid">
<stop
style="stop-color:#323232;stop-opacity:1;"
offset="0"
id="stop5609" />
</linearGradient>
</defs>
<sodipodi:namedview
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1"
objecttolerance="10"
gridtolerance="10"
guidetolerance="10"
inkscape:pageopacity="0"
inkscape:pageshadow="2"
inkscape:window-width="1920"