attrib_type.txt 10.9 KB
Newer Older
1 2 3
# this file is intended to hold all valid attrib_type
# table entries for all ensembl databases that we release
#
ml6's avatar
ml6 committed
4
# If you use the provided upload script, commentlines and
5 6 7 8 9
# emptry lines should be automatically removed, all
# other lines should contain tab delimited database entries
# for the attrib_type table

# each attribute type should be preceeded with a comment that
10 11
# describes its uses, unless its description field is deemed to be
# expressive enough
12

13
# need to document and find out about each attrib_type
14 15


16
1	embl_acc	EMBL accession
17

18 19 20 21
2	status	Status

3	synonym	Synonym

22
4	name	Name	Alternative/long name
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46

5	type	Type of feature

# A seq_region that is not represented in a more global coordinate system
# should get the toplevel attribute and value 1
# If you have more than one assembly in you database, this feature will
# not work as expected. You should then explicitly request features in a specific
# cordinate system
6	toplevel	Top Level	Top Level Non-Redundant Sequence Region

# The number of genes on each seq_region is counted and stored under this
# seq_region_attribute to be displayed on mapview. Mainly web code uses this.
7	GeneCount	Gene Count	Total Number of Genes

# Same as above for known genes
8	KnownGeneCount	Known Gene Count	Total Number of Known Genes

# same as above for pseudogenes. The criteria for a pseudogene is,
# that the gene.type fieled matches /pseudogene/
9	PseudoGeneCount	PseudoGene Count	Total Number of PseudoGenes

# Snps on a seq_region. See above.
10	SNPCount	SNP Count	Total Number of SNPs

ml6's avatar
ml6 committed
47
# another seq_region attribute. When a seq_region should be used with a
48
# different codon table this attrbutes value should contain its number.
ml6's avatar
ml6 committed
49
# This is a bioperl codon table, find out from there which number to use
50 51 52 53 54 55 56 57 58 59 60 61 62
# for your seq_region
# Useful for Mitochondrium and Bacteria with non standard codon tables
11	codon_table	Codon Table	Alternate codon table

# This is an attribute for a translation. Values describe start and end
# position of a seelnocystein in a Translation (Amino Acid coordinates)
# Example: "123 123 U". This is the general sequence edit format.
# Other attributess with sequence edits for different reasons will come
# up in the future
12	_selenocysteine	Selenocysteine

13	bacend	bacend

ml6's avatar
ml6 committed
63
# Contains the htg phase for clones.
64 65 66 67 68
14	htg	htg	High Throughput phase attribute

15	miRNA	Micro RNA	Coordinates of the mature miRNA

# A sequence region that you consider not part of the reference genome should
69
# be tagged as non_ref in seq_region_attrib. Chromosome 6 haplotypes in human
ml6's avatar
ml6 committed
70
# are exmaples of that.
71 72 73 74
16	non_ref	Non Reference	Non Reference Sequence Region

17	sanger_project	Sanger Project name

ml6's avatar
ml6 committed
75
18	clone_name	Clone name
76

ml6's avatar
ml6 committed
77
19	fish	FISH location
78

ml6's avatar
ml6 committed
79
21	org	Sequencing centre
80

ml6's avatar
ml6 committed
81
22	method	Method
82 83 84 85 86 87 88

23	superctg	Super contig id

24	inner_start	Max start value

25	inner_end	Min end value

ml6's avatar
ml6 committed
89
26	state	Current state of clone
90 91 92

27	organisation	Organisation sequencing clone

ml6's avatar
ml6 committed
93
28	seq_len	Accession length
94

ml6's avatar
ml6 committed
95
29	fp_size	FP size
96 97

30	BACend_flag	BAC end flags
Steve Trevanion's avatar
Steve Trevanion committed
98

99
# used by Vega web code to link WebFPC
Steve Trevanion's avatar
Steve Trevanion committed
100
31	fpc_clone_id	fpc clone
101 102

# additional gene counts for Vega (see GeneCount for general description)
Steve Trevanion's avatar
Steve Trevanion committed
103 104 105
32	KnwnPCCount	protein_coding_KNOWN	Number of Known Protein Coding
33	NovPCCount	protein_coding_NOVEL	Number of Novel Protein Coding
34	NovPTCount	processed_transcript_NOVEL	Number of Novel Processed Transcripts
106 107
35	PutPTCount	processed_transcript_PUTATIVE	Number of Putative Processed Transcripts
36	PredPCCount	protein_coding_PREDICTED	Number of Predicted Protein Coding
Steve Trevanion's avatar
IG case  
Steve Trevanion committed
108 109
37	NovIGGeneCount	IG_gene_NOVEL	Number of Novel IG Genes
38	NovIGPsGenCount	IG_pseudogene_NOVEL	Number of Novel IG Pseudogenes
Steve Trevanion's avatar
Steve Trevanion committed
110 111 112 113 114
39	TotPsCount	total_pseudogene	Total Number of Pseudogenes
40	KnwnProcPsCount	processed_pseudogene	Number of Known Processed Pseudogenes
41	KnwnUnPsCount	unprocessed_pseudogene	Number of Known Unprocessed Pseudogenes
42	KnwnPCProgCount	protein_coding_in_progress_KNOWN	Number of Known Protein Coding in progress
43	NovPCProgCount	protein_coding_in_progress_NOVEL	Number of Novel Protein Coding in progress
115 116

# Vega annotation stats
Steve Trevanion's avatar
Steve Trevanion committed
117 118 119
44	AnnotSeqLength	Annotated sequence length	Annotated Sequence
45	TotCloneNum	Total number of clones	Total Number of Clones
46	NumAnnotClone	Fully annotated clones	Number of Fully Annotated Clones
120 121

# Acknowledgements for manual annotation of this seq_region
122
47	ack	Acknowledgement	Acknowledgement for manual annotation
123 124

# old clone attribute
125
48	htg_phase	High throughput phase	High throughput genomic sequencing phase
126
49	description	Description	A general descriptive text attribute
127
50	chromosome	Chromosome	Chromosomal location for supercontigs that are not assembled
128 129
51	nonsense	Nonsense Mutation	Strain specific nonesense mutation

130 131 132 133
# misc Vega attribs
52	author	Author	Group resonsible for Vega annotation
53	author_email	Author email address	Author email address
54	remark	Remark	Annotation remark
Steve Trevanion's avatar
typo  
Steve Trevanion committed
134
55	transcr_class	Transcript class	Transcript class
Steve Trevanion's avatar
Steve Trevanion committed
135
56	KnwnPTCount	processed_transcript_KNOWN	Number of Known Processed Transcripts
Steve Trevanion's avatar
Steve Trevanion committed
136
57	ccds	CCDS	CCDS identifier
137

ml6's avatar
ml6 committed
138
# make first amino acid methionine
139
58	initial_met	Initial methionine	Set first amino acid to methionine
Glenn Proctor's avatar
Glenn Proctor committed
140 141

# label frameshifts modelled as short (1,2,4,5 bp) introns
142
59	Frameshift	Frameshift	Frameshift modelled as intron
Steve Trevanion's avatar
Steve Trevanion committed
143 144 145 146

#more gene counts for Vega
60	PTCount	processed_transcript_UNKNOWN	Number of Processed Transcripts
61	PredPTCount	processed_transcript_PREDICTED	Number of Predicted Processed Transcripts
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164


62	ncRNA	Structure	RNA secondary structure line

63	skip_clone	skip clone  Skip clone in align_by_clone_identity.pl

# Gene counts for seq_region_stats.pl script
64	GeneNo_knwCod	known protein_coding Gene Count	Number of known protein_coding Genes
65	GeneNo_novCod	novel protein_coding Gene Count	Number of novel protein_coding Genes
66	GeneNo_rRNA	rRNA Gene Count	Number of rRNA Genes
67	GeneNo_pseudo	pseudogene Gene Count	Number of pseudogene Genes
68	GeneNo_snRNA	snRNA Gene Count	Number of snRNA Genes
69	GeneNo_snoRNA	snoRNA Gene Count	Number of snoRNA Genes
70	GeneNo_miRNA	miRNA Gene Count	Number of miRNA Genes
71	GeneNo_mscRNA	misc_RNA Gene Count	Number of misc_RNA Genes
72	GeneNo_scRNA	scRNA Gene Count	Number of scRNA Genes
73	GeneNo_MTrRNA	Mt_rRNA Gene Count	Number of Mt_rRNA Genes
74	GeneNo_MTtRNA	Mt_tRNA Gene Count	Number of Mt_tRNA Genes
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
165
75	GeneNo_RNA_pseu	scRNA_pseudogene Gene Count	Number of scRNA_pseudogene Genes
Glenn Proctor's avatar
Glenn Proctor committed
166
76	GeneNo_tRNA	tRNA Gene Count	 Number of tRNA Genes
167

Glenn Proctor's avatar
Glenn Proctor committed
168 169
80	supercontig	SuperContig name	NULL
81	well_name	Well plate name	NULL
Ensembl Account's avatar
Ensembl Account committed
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184

# Added by fc1 26/11/06
82	bacterial	Bacterial
83	NovelCDSCount	Novel CDS Count
84	NovelTransCount	Novel Transcript Count
85	PutTransCount	Putative Transcript Count
86	PredTransCount	Predicted Transcript Count
87	UnclassPsCount	Unclass Ps count
88	KnwnprogCount	Known prog Count
89	NovCDSprogCount	Novel CDS prog count
90	bacend_well_nam	BACend well name
91	alt_well_name	Alt well name
92	TranscriptEdge	Transcript Edge
93	alt_embl_acc	Alt EMBL acc
94	alt_org	Alt org
ml6's avatar
ml6 committed
185

Damian Smedley's avatar
Damian Smedley committed
186
# anacode attribs added by ml6 29/11/06 - seen in yeast but not others
ml6's avatar
ml6 committed
187 188
95	intl_clone_name	International Clone Name
96	embl_version	EMBL Version
ml6's avatar
ml6 committed
189
97	chr	Chromosome Name	Chromosome Name Contained in the Assembly
190
98	equiv_asm	Equivalent EnsEMBL assembly	For full chromosomes made from NCBI AGPs
Damian Smedley's avatar
Damian Smedley committed
191 192 193 194 195 196 197 198 199
99	GeneNo_ncRNA	ncRNA Gene Count	Number of ncRNA Genes

# Ig segment gene counts for seq regions stats script ds5 2/2/07
100	GeneNo_IgSeg	Ig segment Gene Count	Number of Ig segment Genes

# cat missing atts
109	HitSimilarity	hit similarity	percentage id to parent transcripts
110	HitCoverage	hit coverage	coverage of parent transcripts
111	PropNonGap	proportion non gap	proportion non gap
ml6's avatar
ml6 committed
200
112	NumStops	number of stops
Damian Smedley's avatar
Damian Smedley committed
201
113	GapExons	gap exons	number of gap exons
ml6's avatar
ml6 committed
202
114	SourceTran	source transcript	source transcript
Damian Smedley's avatar
Damian Smedley committed
203 204 205
115	EndNotFound	end not found	end not found
116	StartNotFound	start not found	start not found

Steve Trevanion's avatar
Steve Trevanion committed
206 207 208 209 210 211
117	Frameshift Fra	Frameshift modelled as intron

# Other Vega attribs
118	ensembl_name	Ensembl name	Name of equivalent Ensembl chromosome
119	NoAnnotation	NoAnnotation	Clones without manual annotation
120	hap_contig	Haplotype contig	Contig present on a haplotype
ml6's avatar
ml6 committed
212 213 214 215 216 217 218

# loutre attribs added by ml6
121	annotated	Clone Annotation Status
122	keyword	Clone Keyword
123	hidden_remark	Hidden Remark
124	mRNA_start_NF	mRNA start not found
125	mRNA_end_NF	mRNA end not found
Steve Trevanion's avatar
Steve Trevanion committed
219 220
126	cds_start_NF	CDS start not found
127	cds_end_NF	CDS end not found
ml6's avatar
ml6 committed
221 222
128	write_access	Write access for Sequence Set	1 for writable , 0 for read-only
129	hidden	Hidden Sequence Set
Steve Trevanion's avatar
Steve Trevanion committed
223 224

# loutre attribs for vega production (st3)
Steve Trevanion's avatar
Steve Trevanion committed
225
130	vega_name	Vega name	Vega seq_region.name
226
131	vega_export_mod	Export mode	E (External), I (Internal) etc
Steve Trevanion's avatar
Steve Trevanion committed
227
132	vega_release	Vega release	Vega release number
Chao-Kung Chen's avatar
Chao-Kung Chen committed
228 229

# loutre attribs for assembly_tags (ck1)
Chao-Kung Chen's avatar
Chao-Kung Chen committed
230 231 232 233
133	atag_CLE	Clone_left_end	Clone_lef_end feature marked in GAP database
134	atag_CRE	Clone_right_end	Clone_right_end feature marked in GAP database
135	atag_Misc	Misc	miscellaneous feature marked in GAP database
136	atag_Unsure	Unsure	region of uncertain DNA sequence marked in GAP database
234
137	MultAssem	Multiple Assembled seq region	Part of Seq Region is part of more than one assembly
Ian Sealy's avatar
Ian Sealy committed
235 236 237 238


140	wgs	WGS contig	WGS contig integrated into the map
141	bac	AGP clones	tiling path of clones
Glenn Proctor's avatar
Glenn Proctor committed
239 240 241 242

# Attribute for per-gene GC percentage

142	GeneGC	Gene GC	 Percentage GC content for this gene
Stephen Rice's avatar
Stephen Rice committed
243 244

# vega
245
143	TotAssemblyLeng	Finished sequence length	Length of the assembly not counting sequence gaps
246 247 248 249 250

# Drosophila, only where the translation provided by flybase differs from that in our database by ONE amino acid
144	amino_acid_sub	Amino acid substitution	In drosophila, some translations have been manually curated by FlyBase and a stop codon has been changed to an amino acid in order to prevent premature truncation.
# Drosophila. Sometimes sequences have been manually altered to remove one base, and this alters the whole translation
145	_rna_edit	rna_edit	RNA edit
Sarah Dyer's avatar
 
Sarah Dyer committed
251 252 253 254

#genebuild - databases of removed transcripts
146	kill_reason	Kill Reason	Reason why a transcript has been killed
147	strip_UTR	Strip UTR	Transcript needs bad UTR removing
Steve Trevanion's avatar
Steve Trevanion committed
255 256 257 258 259 260 261 262 263 264 265

# vega
148	TotAssLength	Finished sequence length	Finished Sequence
149	NovPsCount	novel_pseudogene	Number of Novel Pseudogenes
150	KnwnPsCount	known_pseudogene	Number of Known Pseudogenes
151	KnwnTPsCount	known_transcribed_pseudogene	Number of Known Transcribed Pseudogenes
152	TotPTCount	total_processed_transcript	Total Number of Processed Transcripts
153	TotPCCount	total_protein_coding	Total Number of Protein Coding
154	NovNcCount	novel_non_coding	Number of Novel Non Coding
155	KnwnPolyCount	known_polymorphic	Number of Known Polymorphic
156	NovPolyCount	novel_polymorphic	Number of Novel Polymorphic
Steve Trevanion's avatar
Steve Trevanion committed
266
157	TotIGGeneCount	total_IG_gene	Total Number of IG Genes
Steve Trevanion's avatar
Steve Trevanion committed
267 268 269 270
158	NovProcPsCount	novel_processed_pseudogene	Number of Novel Processed Pseudogenes
159	NovUnPsCount	novel_unprocessed_pseudogene	Number of Novel Unprocessed Pseudogenes
160	NovTPsCount	novel_transcribed_pseudogene	Number of Novel Transcribed Pseudogenes
161	NovTECCount	novel_TEC	Number of Novel TEC Genes
Steve Trevanion's avatar
IG case  
Steve Trevanion committed
271 272
162	KnwnIGGeneCount	IG_gene_KNOWN	Number of Known IG Genes
163	KnwnIGPsGeCount	IG_pseudogene_KNOWN	Number of Known IG Pseudogenes