Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl
Commits
3ec37fc5
Commit
3ec37fc5
authored
Oct 07, 2004
by
Patrick Meidl
Browse files
changes from branch-vega-23-dev
parent
f6782625
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
676 additions
and
198 deletions
+676
-198
misc-scripts/density_feature/glovar_snp_density.pl
misc-scripts/density_feature/glovar_snp_density.pl
+69
-20
misc-scripts/density_feature/glovar_snp_wrapper.pl
misc-scripts/density_feature/glovar_snp_wrapper.pl
+8
-3
misc-scripts/density_feature/vega_gene_density.pl
misc-scripts/density_feature/vega_gene_density.pl
+201
-174
misc-scripts/density_feature/vega_percent_gc_calc.pl
misc-scripts/density_feature/vega_percent_gc_calc.pl
+196
-0
misc-scripts/density_feature/vega_repeat_coverage_calc.pl
misc-scripts/density_feature/vega_repeat_coverage_calc.pl
+201
-0
misc-scripts/repeats/vega_repeat_libraries.pl
misc-scripts/repeats/vega_repeat_libraries.pl
+1
-1
No files found.
misc-scripts/density_feature/glovar_snp_density.pl
View file @
3ec37fc5
...
...
@@ -3,7 +3,8 @@
=head1 NAME
glovar_snp_density.pl -
script to calculate glovar SNP density and stats for Vega
Script to calculate glovar SNP density and stats (and optioanlly prepare AV
index dumps) for Vega.
=head1 SYNOPSIS
...
...
@@ -11,13 +12,15 @@ script to calculate glovar SNP density and stats for Vega
--species=Homo_sapiens
[--chr=6,13,14]
[--dry-run|-n]
[--avdump|-a]
[--help|-h]
=head1 DESCRIPTION
This script calculates Glovar SNP densities and total numbers per chromosome
for use in mapview. Can be run for individual chromosomes if desired (default:
all chromosomes).
all chromosomes). It optionally also dumps SNPs into a file for generating the
AV search index.
=head1 LICENCE
...
...
@@ -55,12 +58,14 @@ use Bio::EnsEMBL::DensityType;
use
Bio::EnsEMBL::
DensityFeature
;
use
POSIX
;
my
(
$species
,
$chr
,
$dry
,
$help
);
my
(
$species
,
$chr
,
$dry
,
$avdump
,
$help
);
&GetOptions
(
"
species=s
"
=>
\
$species
,
"
chr=s
"
=>
\
$chr
,
"
dry-run
"
=>
\
$dry
,
"
n
"
=>
\
$dry
,
"
avdump
"
=>
\
$avdump
,
"
a
"
=>
\
$avdump
,
"
help
"
=>
\
$help
,
"
h
"
=>
\
$help
,
);
...
...
@@ -71,18 +76,19 @@ if($help || !$species){
--species=Homo_sapiens
[--chr=6,13,14]
[--dry-run|-n]
[--avdump|-a]
[--help|-h]\n\n)
;
exit
;
}
$ENV
{'
ENSEMBL_SPECIES
'}
=
$species
;
#
# set db user/pass to allow write access
# set db user/pass to allow write access
my
$db_ref
=
$
EnsWeb::
species_defs
->
databases
;
$db_ref
->
{'
ENSEMBL_DB
'}{'
USER
'}
=
$
EnsWeb::
species_defs
->
ENSEMBL_WRITE_USER
;
$db_ref
->
{'
ENSEMBL_DB
'}{'
PASS
'}
=
$
EnsWeb::
species_defs
->
ENSEMBL_WRITE_PASS
;
#
# connect to databases
# connect to databases
my
$databases
=
&
EnsEMBL::DB::Core::
get_databases
(
qw(core glovar)
);
die
"
Problem connecting to databases:
$databases
->{'error'}
\n
"
...
...
@@ -90,33 +96,33 @@ die "Problem connecting to databases: $databases->{'error'}\n"
warn
"
Database error:
$databases
->{'non_fatal_error'}
\n
"
if
$databases
->
{'
non_fatal_error
'};
#
# get the adaptors needed
# get the adaptors needed
my
$dfa
=
$databases
->
{'
core
'}
->
get_DensityFeatureAdaptor
;
my
$dta
=
$databases
->
{'
core
'}
->
get_DensityTypeAdaptor
;
my
$aa
=
$databases
->
{'
core
'}
->
get_AnalysisAdaptor
;
my
$attrib_adaptor
=
$databases
->
{'
core
'}
->
get_AttributeAdaptor
;
my
$slice_adaptor
=
$databases
->
{'
core
'}
->
get_SliceAdaptor
;
#
# which chromosomes do we run?
# which chromosomes do we run?
my
@top_slices
;
if
(
$chr
)
{
#
#
run chromosomes specified on commandline
# run chromosomes specified on commandline
foreach
(
split
("
,
",
$chr
))
{
push
@top_slices
,
$slice_adaptor
->
fetch_by_region
("
toplevel
",
$_
);
}
}
else
{
#
#
run all chromosomes for this species
# run all chromosomes for this species
@top_slices
=
@
{
$slice_adaptor
->
fetch_all
("
toplevel
")};
}
#
# calculate block size (assuming 4000 blocks per genome)
# calculate block size (assuming 4000 blocks per genome)
my
(
$block_size
,
$genome_size
);
for
my
$slice
(
@
{
$slice_adaptor
->
fetch_all
("
toplevel
")}
)
{
$genome_size
+=
$slice
->
length
;
}
$block_size
=
int
(
$genome_size
/
4000
);
#
# analysis
# analysis
my
$analysis
=
new
Bio::EnsEMBL::
Analysis
(
-
program
=>
"
glovar_snp_density.pl
",
-
database
=>
"
vega
",
...
...
@@ -125,20 +131,40 @@ my $analysis = new Bio::EnsEMBL::Analysis (
-
logic_name
=>
"
snpDensity
");
$aa
->
store
(
$analysis
)
unless
$dry
;
#
# density type
# density type
my
$dt
=
Bio::EnsEMBL::
DensityType
->
new
(
-
analysis
=>
$analysis
,
-
block_size
=>
$block_size
,
-
value_type
=>
'
sum
');
$dta
->
store
(
$dt
)
unless
$dry
;
#
# loop over chromosomes
# loop over chromosomes
my
@chr
;
foreach
my
$sl
(
@top_slices
)
{
push
@chr
,
$sl
->
seq_region_name
;
}
print
STDERR
"
\n
Available chromosomes:
@chr
\n
";
# settings for AV index dump
use
constant
SNP_LINE
=>
join
("
\t
",
'
Glovar SNP
',
'
%s
',
'
/%s/snpview?snp=%s&source=glovar
',
'
%s
',
"
Single nucleotide polymorphism (SNP) %s [Alleles: %s]. Alternative IDs: %s.
\n
"
);
if
(
$avdump
)
{
print
STDERR
"
Preparing directories for AV index dump...
\n
";
my
$dumpdir
=
"
$ENV
{'ENSEMBL_SERVERROOT'}/utils/indexing/input
";
unless
(
-
e
$dumpdir
)
{
mkdir
$dumpdir
,
0777
or
die
"
Could not creat directory
$dumpdir
: $!
\n
";
}
unless
(
-
e
"
$dumpdir
/
$species
")
{
mkdir
"
$dumpdir
/
$species
",
0777
or
die
"
Could not creat directory
$dumpdir
/
$species
: $!
\n
";
}
open
(
AV
,
"
>>
$dumpdir
/
$species
/SNP.txt
")
or
die
"
Could not open
$dumpdir
/
$species
/SNP.txt for writing: $!
\n
";
print
STDERR
"
Done.
\n
";
}
my
(
$current_start
,
$current_end
);
foreach
my
$slice
(
@top_slices
)
{
$current_start
=
1
;
...
...
@@ -149,7 +175,7 @@ foreach my $slice (@top_slices) {
print
STDERR
"
\n
SNP densities for chr
$chr
with block size
$block_size
\n
";
print
STDERR
"
Start at
"
.
`
date
`;
#
#
loop over blocks
# loop over blocks
while
(
$current_start
<=
$slice
->
end
)
{
$i
++
;
$current_end
=
$current_start
+
$block_size
-
1
;
...
...
@@ -166,12 +192,34 @@ foreach my $slice (@top_slices) {
$current_start
=
$current_end
+
1
;
next
;
}
#
#
only count snps that don't overlap slice start
#
#
also, avoid duplicate counting
my
%snps
=
map
{
"
$_
->
name
=> 1
"
if
(
$_
->
start
>=
1
)
}
@
{
$snps
};
# only count snps that don't overlap slice start
# also, avoid duplicate counting
my
%snps
=
map
{
"
$_
->
display_id
=> 1
"
if
(
$_
->
start
>=
1
)
}
@
{
$snps
};
$count
=
scalar
(
keys
%snps
);
## density
# AV index dump
if
(
$avdump
)
{
foreach
my
$snpo
(
@
{
$snps
})
{
next
if
(
$snpo
->
start
<
1
);
my
$snpid
=
$snpo
->
display_id
;
my
(
@IDs
,
@desc
);
foreach
my
$link
(
$snpo
->
each_DBLink
)
{
push
@IDs
,
$link
->
primary_id
;
push
@desc
,
$link
->
database
.
"
:
"
.
$link
->
primary_id
;
}
print
AV
sprintf
SNP_LINE
,
$snpid
,
$species
,
$snpid
,
join
("
",
@IDs
),
$snpid
,
$snpo
->
alleles
,
join
("
,
",
@desc
)
;
}
}
# density
my
$df
=
Bio::EnsEMBL::
DensityFeature
->
new
(
-
seq_region
=>
$slice
,
-
start
=>
$current_start
,
...
...
@@ -182,12 +230,12 @@ foreach my $slice (@top_slices) {
$dfa
->
store
(
$df
)
unless
$dry
;
$total
+=
$count
;
#
#
logging
# logging
print
STDERR
"
Chr:
$chr
| Bin:
$i
/
$bins
| Count:
$count
|
";
print
STDERR
"
Mem:
"
.
`
ps $$ -o vsz |tail -1
`;
}
#
#
stats
# stats
print
STDERR
"
Total for chr
$chr
:
$total
\n
";
my
$stat
=
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
SNPs
',
...
...
@@ -196,6 +244,7 @@ foreach my $slice (@top_slices) {
-
DESCRIPTION
=>
'
Total Number of SNPs
');
$attrib_adaptor
->
store_on_Slice
(
$slice
,
[
$stat
])
unless
$dry
;
}
close
AV
if
$avdump
;
print
STDERR
"
\n
All done at
"
.
`
date
`
.
"
\n
";
misc-scripts/density_feature/glovar_snp_wrapper.pl
View file @
3ec37fc5
...
...
@@ -10,12 +10,13 @@ Wrapper for glovar_snp_density.pl
./glovar_snp_density.pl
--species=Homo_sapiens
[--dry-run|-n]
[--avdump|-a]
=head1 DESCRIPTION
Wrapper for glovar_snp_density.pl to run it chromosome by chromosome. This is
an attempt to avoid high memory footprints caused by a memory leak somerwhere
in the API
..
.
in the API
. See glovar_snp_density.pl for more detailed documentation
.
=head1 LICENCE
...
...
@@ -49,17 +50,20 @@ use SiteDefs;
use
EnsWeb
;
use
Getopt::
Long
;
my
(
$species
,
$dry
);
my
(
$species
,
$dry
,
$avdump
);
&GetOptions
(
"
species=s
"
=>
\
$species
,
"
dry-run
"
=>
\
$dry
,
"
n
"
=>
\
$dry
,
"
avdump
"
=>
\
$avdump
,
"
a
"
=>
\
$avdump
,
);
unless
(
$species
)
{
print
qq(Usage:
./glovar_snp_density.pl
--species=Homo_sapiens
[--avdump|-a]
[--dry-run|-n]\n\n)
;
exit
;
}
...
...
@@ -69,8 +73,9 @@ $ENV{'ENSEMBL_SPECIES'} = $species;
## run glovar_snp_density.pl for each chromsome in this species
my
$command
=
"
./glovar_snp_density.pl --species=
$species
";
$command
.=
"
-n
"
if
(
$dry
);
$command
.=
"
-a
"
if
(
$avdump
);
foreach
my
$chr
(
@
{
$
EnsWeb::
species_defs
->
ENSEMBL_CHROMOSOMES
})
{
warn
"
$command
--chr=
$chr
\n
";
system
("
$command
--chr=
$chr
");
system
("
$command
--chr=
$chr
")
==
0
or
die
"
$command
--chr=
$chr
failed: $!
\n
"
;
}
misc-scripts/density_feature/vega_gene_density.pl
View file @
3ec37fc5
...
...
@@ -102,17 +102,33 @@ my $slice_adaptor = $db->get_SliceAdaptor;
my
$top_slices
=
$slice_adaptor
->
fetch_all
('
toplevel
');
## determine blocksize, assuming you want 150 blocks for the smallest
## chromosome
my
(
$block_count
,
$genome_size
,
$block_size
);
my
(
@chr
,
$block_size
,
$min_chr
);
## chromosome over 5Mb in size. Use an extra, smaller bin size for chromosomes smaller than 5Mb
my
$big_chr
=
[]
;
my
$small_chr
=
[]
;
my
(
@big_chr_names
,
$big_block_size
,
$min_big_chr
);
my
(
@small_chr_names
,
$small_block_size
,
$min_small_chr
);
for
my
$slice
(
@$top_slices
)
{
if
(
!
$min_chr
or
(
$min_chr
>
$slice
->
length
))
{
$min_chr
=
$slice
->
length
;
if
(
$slice
->
length
<
5000000
)
{
if
(
!
$min_small_chr
or
(
$min_small_chr
>
$slice
->
length
))
{
$min_small_chr
=
$slice
->
length
;
}
push
@small_chr_names
,
$slice
->
seq_region_name
;
push
@
{
$small_chr
->
[
0
]},
$slice
;
}
push
@chr
,
$slice
->
seq_region_name
;
if
(
!
$min_big_chr
or
(
$min_big_chr
>
$slice
->
length
)
&&
$slice
->
length
>
5000000
)
{
$min_big_chr
=
$slice
->
length
;
}
push
@big_chr_names
,
$slice
->
seq_region_name
;
push
@
{
$big_chr
->
[
0
]},
$slice
;
}
$block_size
=
int
(
$min_chr
/
150
);
print
STDERR
"
\n
Available chromosomes:
@chr
\n
";
$big_block_size
=
int
(
$min_big_chr
/
150
);
push
@
{
$big_chr
},
$big_block_size
;
$small_block_size
=
int
(
$min_small_chr
/
150
);
push
@
{
$small_chr
},
$small_block_size
;
print
STDERR
"
\n
Available chromosomes using block size of
$big_block_size
:
@big_chr_names
\n
";
print
STDERR
"
\n
Available chromosomes using block size of
$small_block_size
:
@small_chr_names
\n
";
## gene types
my
%gene_types
=
(
...
...
@@ -137,176 +153,187 @@ print STDERR join(" ", sort keys %gene_types);
print
STDERR
"
\n
";
## create analysis and density type objects
my
%dtcache
;
foreach
my
$type
(
keys
%gene_types
)
{
my
$analysis
=
new
Bio::EnsEMBL::
Analysis
(
-
program
=>
"
vega_gene_density.pl
",
-
database
=>
"
ensembl
",
-
gff_source
=>
"
vega_gene_density.pl
",
-
gff_feature
=>
"
density
",
-
logic_name
=>
$gene_types
{
$type
});
$aa
->
store
(
$analysis
)
unless
$dry
;
$analysis
=
$aa
->
fetch_by_logic_name
(
$gene_types
{
$type
});
my
$dt
=
Bio::EnsEMBL::
DensityType
->
new
(
-
analysis
=>
$analysis
,
-
block_size
=>
$block_size
,
-
value_type
=>
'
sum
');
$dta
->
store
(
$dt
)
unless
$dry
;
$dtcache
{
$gene_types
{
$type
}}
=
$dt
;
my
%dtcache
;
foreach
my
$block_size
(
$big_block_size
,
$small_block_size
)
{
eval
{
foreach
my
$type
(
keys
%gene_types
)
{
my
$analysis
=
new
Bio::EnsEMBL::
Analysis
(
-
program
=>
"
vega_gene_density.pl
",
-
database
=>
"
ensembl
",
-
gff_source
=>
"
vega_gene_density.pl
",
-
gff_feature
=>
"
density
",
-
logic_name
=>
$gene_types
{
$type
});
$aa
->
store
(
$analysis
)
unless
$dry
;
$analysis
=
$aa
->
fetch_by_logic_name
(
$gene_types
{
$type
});
my
$dt
=
Bio::EnsEMBL::
DensityType
->
new
(
-
analysis
=>
$analysis
,
-
block_size
=>
$block_size
,
-
value_type
=>
'
sum
');
$dta
->
store
(
$dt
)
unless
$dry
;
$dtcache
{
$block_size
}{
$gene_types
{
$type
}}
=
$dt
;
}
}
}
## loop over chromosomes
## loop over chromosomes, doing big ones then small ones
my
(
$current_start
,
$current_end
);
foreach
my
$slice
(
@$top_slices
){
$current_start
=
1
;
my
$chr
=
$slice
->
seq_region_name
;
my
(
%total
,
$i
,
%gene_names
);
my
$bins
=
POSIX::
ceil
(
$slice
->
end
/
$block_size
);
print
STDERR
"
\n
Gene densities for chr
$chr
with block size
$block_size
\n
";
print
STDERR
"
Start at
"
.
`
date
`;
## loop over blocks
my
@density_features
;
while
(
$current_start
<=
$slice
->
end
)
{
$i
++
;
$current_end
=
$current_start
+
$block_size
-
1
;
if
(
$current_end
>
$slice
->
end
)
{
$current_end
=
$slice
->
end
;
}
my
$sub_slice
=
$slice
->
sub_Slice
(
$current_start
,
$current_end
);
my
%num
=
();
## count genes by type
my
$genes
;
eval
{
$genes
=
$sub_slice
->
get_all_Genes
;
};
if
(
$@
)
{
warn
$@
;
$current_start
=
$current_end
+
1
;
next
;
}
foreach
my
$gene
(
@
{
$genes
})
{
## only count genes that don't overlap the subslice start
## (since these were already counted in the last bin)
if
(
$gene
->
start
>=
1
)
{
$total
{
$gene
->
type
}
++
;
}
$num
{
$gene_types
{
$gene
->
type
}}
++
;
}
## create DensityFeature objects for each type
foreach
my
$type
(
keys
%density_types
)
{
push
@density_features
,
Bio::EnsEMBL::
DensityFeature
->
new
(
-
seq_region
=>
$slice
,
-
start
=>
$current_start
,
-
end
=>
$current_end
,
-
density_type
=>
$dtcache
{
$type
},
-
density_value
=>
$num
{
$type
}
||
0
);
}
$current_start
=
$current_end
+
1
;
## logging
print
STDERR
"
Chr:
$chr
| Bin:
$i
/
$bins
| Counts:
";
print
STDERR
join
("
,
",
map
{
$num
{
$gene_types
{
$_
}}
||
0
}
sort
keys
%gene_types
);
print
STDERR
"
|
";
print
STDERR
"
Mem:
"
.
`
ps $$ -o vsz |tail -1
`;
foreach
my
$object
(
$big_chr
,
$small_chr
)
{
eval
{
my
$block_size
=
$object
->
[
1
];
foreach
my
$slice
(
@
{
$object
->
[
0
]}){
$current_start
=
1
;
my
$chr
=
$slice
->
seq_region_name
;
my
(
%total
,
$i
,
%gene_names
);
my
$bins
=
POSIX::
ceil
(
$slice
->
end
/
$block_size
);
print
STDERR
"
\n
Gene densities for chr
$chr
with block size
$block_size
\n
";
print
STDERR
"
Start at
"
.
`
date
`;
## loop over blocks
my
@density_features
;
while
(
$current_start
<=
$slice
->
end
)
{
$i
++
;
$current_end
=
$current_start
+
$block_size
-
1
;
if
(
$current_end
>
$slice
->
end
)
{
$current_end
=
$slice
->
end
;
}
my
$sub_slice
=
$slice
->
sub_Slice
(
$current_start
,
$current_end
);
my
%num
=
();
## count genes by type
my
$genes
;
eval
{
$genes
=
$sub_slice
->
get_all_Genes
;
};
if
(
$@
)
{
warn
$@
;
$current_start
=
$current_end
+
1
;
next
;
}
foreach
my
$gene
(
@
{
$genes
})
{
## only count genes that don't overlap the subslice start
## (since these were already counted in the last bin)
if
(
$gene
->
start
>=
1
)
{
$total
{
$gene
->
type
}
++
;
}
$num
{
$gene_types
{
$gene
->
type
}}
++
;
}
## create DensityFeature objects for each type
foreach
my
$type
(
keys
%density_types
)
{
push
@density_features
,
Bio::EnsEMBL::
DensityFeature
->
new
(
-
seq_region
=>
$slice
,
-
start
=>
$current_start
,
-
end
=>
$current_end
,
-
density_type
=>
$dtcache
{
$block_size
}{
$type
},
-
density_value
=>
$num
{
$type
}
||
0
);
}
$current_start
=
$current_end
+
1
;
## logging
print
STDERR
"
Chr:
$chr
| Bin:
$i
/
$bins
| Counts:
";
print
STDERR
join
("
,
",
map
{
$num
{
$gene_types
{
$_
}}
||
0
}
sort
keys
%gene_types
);
print
STDERR
"
|
";
print
STDERR
"
Mem:
"
.
`
ps $$ -o vsz |tail -1
`;
}
## store DensityFeatures for the chromosome
$dfa
->
store
(
@density_features
)
unless
$dry
;
## stats
my
@attribs
;
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
12:Known genes
',
-
CODE
=>
'
KnownGeneCount
',
-
VALUE
=>
$total
{'
Known
'}
||
0
,
-
DESCRIPTION
=>
'
Total Number of Known genes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
14:Novel CDS
',
-
CODE
=>
'
NovelCDSCount
',
-
VALUE
=>
$total
{'
Novel_CDS
'}
||
0
,
-
DESCRIPTION
=>
'
Total Number of Novel CDSs
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
16:Novel transcripts
',
-
CODE
=>
'
NovelTransCount
',
-
VALUE
=>
$total
{'
Novel_Transcript
'}
||
0
,
-
DESCRIPTION
=>
'
Total Number of Novel transcripts
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
24:Putative transcripts
',
-
CODE
=>
'
PutTransCount
',
-
VALUE
=>
$total
{'
Putative
'}
||
0
,
-
DESCRIPTION
=>
'
Total Number of Putative transcripts
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
30:Predicted transcripts
',
-
CODE
=>
'
PredTransCount
',
-
VALUE
=>
$total
{'
Predicted_Gene
'}
||
0
,
-
DESCRIPTION
=>
'
Total Number of Predicted transcripts
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
26:Ig segments
',
-
CODE
=>
'
IgSegCount
',
-
VALUE
=>
$total
{'
Ig_Segment
'}
||
0
,
-
DESCRIPTION
=>
'
Total Number of Ig Segments
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
28:Ig pseudogene Segments
',
-
CODE
=>
'
IgPsSegCount
',
-
VALUE
=>
$total
{'
Ig_Pseudogene_Segment
'}
||
0
,
-
DESCRIPTION
=>
'
Total Number of Ig Pseudogene Segments
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
18:Total pseudogenes
',
-
CODE
=>
'
TotPsCount
',
-
VALUE
=>
$total
{'
Pseudogene
'}
+
$total
{'
Processed_pseudogene
'}
+
$total
{'
Unprocessed_pseudogene
'
||
0
},
-
DESCRIPTION
=>
'
Total Number of Pseudogenes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
20:Processed pseudogenes
',
-
CODE
=>
'
ProcPsCount
',
-
VALUE
=>
$total
{'
Processed_pseudogene
'}
||
0
,
-
DESCRIPTION
=>
'
Number of Processed pseudogenes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
22:Unprocessed pseudogenes
',
-
CODE
=>
'
UnprocPsCount
',
-
VALUE
=>
$total
{'
Unprocessed_pseudogene
'}
||
0
,
-
DESCRIPTION
=>
'
Number of Unprocessed pseudogenes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
13:Known genes (in progress)
',
-
CODE
=>
'
KnwnprogCount
',
-
VALUE
=>
$total
{'
Known_in_progress
'}
||
0
,
-
DESCRIPTION
=>
'
Number of Known Genes in progress
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
15:Novel CDS (in progress)
',
-
CODE
=>
'
NovCDSprogCount
',
-
VALUE
=>
$total
{'
Novel_CDS_in_progress
'}
||
0
,
-
DESCRIPTION
=>
'
Number of novel CDS in progress
');
#only store unclassified pseudogenes if there are no processed and unprocessed pseudos, ie if
#total pseudos eq pseudos
unless
(
$total
{'
Unprocessed_pseudogene
'}
==
0
&&
$total
{'
Processed_pseudogene
'}
==
0
)
{
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
23:Unclassified pseudogenes
',
-
CODE
=>
'
UnclassPsCount
',
-
VALUE
=>
$total
{'
Pseudogene
'}
||
0
,
-
DESCRIPTION
=>
'
Number of Unclassified pseudogenes
');
}
$attrib_adaptor
->
store_on_Slice
(
$slice
,
\
@attribs
)
unless
$dry
;
print
STDERR
"
Total for chr
$chr
:
\n
";
print
STDERR
map
{
"
\t
$_
=>
$total
{
$_
}
\n
"
}
sort
keys
%total
;
}
}
## store DensityFeatures for the chromosome
$dfa
->
store
(
@density_features
)
unless
$dry
;
## stats
my
@attribs
;
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
12:Known genes
',
-
CODE
=>
'
KnownGeneCount
',
-
VALUE
=>
$total
{'
Known
'}
||
0
,
-
DESCRIPTION
=>
'
Total Number of Known genes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
14:Novel CDS
',
-
CODE
=>
'
NovelCDSCount
',
-
VALUE
=>
$total
{'
Novel_CDS
'}
||
0
,
-
DESCRIPTION
=>
'
Total Number of Novel CDSs
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
16:Novel transcripts
',
-
CODE
=>
'
NovelTransCount
',
-
VALUE
=>
$total
{'
Novel_Transcript
'}
||
0
,
-
DESCRIPTION
=>
'
Total Number of Novel transcripts
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
24:Putative transcripts
',
-
CODE
=>
'
PutTransCount
',
-
VALUE
=>
$total
{'
Putative
'}
||
0
,
-
DESCRIPTION
=>
'
Total Number of Putative transcripts
');