Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
d528572a
Commit
d528572a
authored
20 years ago
by
Patrick Meidl
Browse files
Options
Downloads
Patches
Plain Diff
added glovar SNP stats
parent
783bd998
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
misc-scripts/density_feature/vega_seq_region_stats.pl
+162
-204
162 additions, 204 deletions
misc-scripts/density_feature/vega_seq_region_stats.pl
with
162 additions
and
204 deletions
misc-scripts/density_feature/vega_seq_region_stats.pl
+
162
−
204
View file @
d528572a
use
strict
;
#!/usr/local/bin/perl
use
lib
'
../../modules/
','
../../../bioperl-live
';
=head1 NAME
use
Bio::EnsEMBL::DBSQL::
DBAdaptor
;
use
Bio::EnsEMBL::Lite::
DBAdaptor
;
use
Getopt::
Long
;
vega_seq_region_stats.pl -
script to calculate gene and snp numbers for mapview stats
my
(
$host
,
$user
,
$pass
,
$port
,
$dbname
);
=head1 SYNOPSIS
GetOptions
(
"
host=s
",
\
$host
,
"
user=s
",
\
$user
,
"
pass=s
",
\
$pass
,
"
port=i
",
\
$port
,
"
dbname=s
",
\
$dbname
);
my
$db
=
new
Bio::EnsEMBL::DBSQL::
DBAdaptor
(
-
host
=>
$host
,
-
user
=>
$user
,
-
port
=>
$port
,
-
pass
=>
$pass
,
-
dbname
=>
$dbname
);
=head1 DESCRIPTION
#
# Only run on database with genes
#
my
$sth
=
$db
->
prepare
(
"
select count(*) from gene
"
);
$sth
->
execute
();
my
(
$gene_count
)
=
$sth
->
fetchrow_array
();
=head1 LICENCE
if
(
!
$gene_count
)
{
print
STDERR
"
No gene density for
$dbname
.
\n
";
exit
();
}
This code is distributed under an Apache style licence:
Please see http://www.ensembl.org/code_licence.html for details
#
# and seq_regions
#
$sth
=
$db
->
prepare
(
"
select count(*) from seq_region
"
);
$sth
->
execute
();
my
(
$seq_region_count
)
=
$sth
->
fetchrow_array
();
if
(
!
$seq_region_count
)
{
print
STDERR
"
No seq_regions for
$dbname
.
\n
";
exit
();
}
=head1 AUTHOR
Patrick Meidl <pm2@sanger.ac.uk>
#my $snps_present = lite_attach( $db );
my
$snps_present
=
0
;
=head1 CONTACT
Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk
my
$slice_adaptor
=
$db
->
get_SliceAdaptor
();
my
$attrib_adaptor
=
$db
->
get_AttributeAdaptor
();
=cut
use
strict
;
BEGIN
{
$ENV
{'
ENSEMBL_SERVERROOT
'}
=
"
../../..
";
unshift
(
@INC
,"
$ENV
{'ENSEMBL_SERVERROOT'}/conf
");
unshift
(
@INC
,"
$ENV
{'ENSEMBL_SERVERROOT'}/ensembl-compara/modules
");
unshift
(
@INC
,"
$ENV
{'ENSEMBL_SERVERROOT'}/ensembl-draw/modules
");
unshift
(
@INC
,"
$ENV
{'ENSEMBL_SERVERROOT'}/ensembl-external/modules
");
unshift
(
@INC
,"
$ENV
{'ENSEMBL_SERVERROOT'}/ensembl-otter/modules
");
unshift
(
@INC
,"
$ENV
{'ENSEMBL_SERVERROOT'}/modules
");
unshift
(
@INC
,"
$ENV
{'ENSEMBL_SERVERROOT'}/ensembl/modules
");
unshift
(
@INC
,"
$ENV
{'ENSEMBL_SERVERROOT'}/bioperl-live
");
}
my
$top_slices
=
$slice_adaptor
->
fetch_all
(
"
toplevel
"
);
use
SiteDefs
;
use
EnsWeb
;
use
EnsEMBL::DB::
Core
;
use
Getopt::
Long
;
my
(
$species
,
$run
,
$help
);
&GetOptions
(
"
species=s
"
=>
\
$species
,
"
run=s
"
=>
\
$run
,
"
help
"
=>
\
$help
,
"
h
"
=>
\
$help
,
);
if
(
$help
||
!
(
$species
&&
$run
)){
print
qq(Usage:
./vega_seq_region_stats.pl
--species=Homo_sapiens
--run=gene,snp
[--help]\n\n)
;
exit
;
}
foreach
my
$slice
(
@$top_slices
)
{
my
$num_known_genes
=
0
;
my
$num_nov_CDS
=
0
;
my
$num_nov_trans
=
0
;
my
$num_tot_pseudo_genes
=
0
;
my
$num_unclass_pseudo_genes
=
0
;
my
$num_proc_pseudo_genes
=
0
;
my
$num_unproc_pseudo_genes
=
0
;
my
$num_put_trans
=
0
;
my
$num_pred_trans
=
0
;
my
$num_pred_Ig_pseudogenes
=
0
;
my
$num_Ig_segments
=
0
;
print
STDERR
"
Processing seq_region
",
$slice
->
seq_region_name
(),
"
\n
";
my
@genes
=
@
{
$slice
->
get_all_Genes
()};
foreach
my
$gene
(
@genes
)
{
my
$type
=
$gene
->
type
;
if
(
$type
eq
'
Known
')
{
$num_known_genes
++
;
}
elsif
(
$type
eq
'
Novel_CDS
')
{
$num_nov_CDS
++
;
}
elsif
(
$type
eq
'
Novel_Transcript
')
{
$num_nov_trans
++
;
}
elsif
(
$type
eq
'
Putative
')
{
$num_put_trans
++
;
}
elsif
(
$type
eq
'
Predicted_Gene
')
{
$num_pred_trans
++
;
}
elsif
(
$type
eq
'
Ig_Pseudogene_Segment
')
{
$num_pred_Ig_pseudogenes
++
;
}
elsif
(
$type
eq
'
Ig_Segment
')
{
$num_Ig_segments
++
;
}
elsif
(
$type
eq
'
Pseudogene
')
{
$num_unclass_pseudo_genes
++
;
}
elsif
(
$type
eq
'
Processed_pseudogene
')
{
$num_proc_pseudo_genes
++
;
}
elsif
(
$type
eq
'
Unprocessed_pseudogene
')
{
$num_unproc_pseudo_genes
++
;
}
$ENV
{'
ENSEMBL_SPECIES
'}
=
$species
;
if
(
$type
=~
/seudogene$/
)
{
$num_tot_pseudo_genes
++
;
}
}
## set db user/pass to allow write access
my
$db_ref
=
$
EnsWeb::
species_defs
->
databases
;
$db_ref
->
{'
ENSEMBL_DB
'}{'
USER
'}
=
$
EnsWeb::
species_defs
->
ENSEMBL_WRITE_USER
;
$db_ref
->
{'
ENSEMBL_DB
'}{'
PASS
'}
=
$
EnsWeb::
species_defs
->
ENSEMBL_WRITE_PASS
;
print
"
Slice
",
$slice
->
seq_region_name
(),
"
has the following features:
\n\n
";
print
"
known genes =
$num_known_genes
\n
";
print
"
novel coding sequences =
$num_nov_CDS
\n
";
print
"
novel transcripts =
$num_nov_trans
\n
";
print
"
putative transcripts =
$num_put_trans
\n
";
print
"
predicted transcripts =
$num_pred_trans
\n
";
print
"
total number of pseudogenes =
$num_tot_pseudo_genes
\n
";
print
"
\t
unclassified pseudogenes =
$num_unclass_pseudo_genes
\n
";
print
"
\t
processed pseudogenes =
$num_proc_pseudo_genes
\n
";
print
"
\t
unprocessed pseudogenes =
$num_unproc_pseudo_genes
\n
";
print
"
Ig Pseudogenes =
$num_pred_Ig_pseudogenes
\n
";
print
"
Ig Segments =
$num_Ig_segments
\n\n\n
";
## connect to databases
my
%run
;
@run
{
split
(
/,/
,
$run
)}
=
1
;
my
@dbs
=
qw(core)
;
push
@dbs
,
'
glovar
'
if
(
$run
{'
snp
'});
my
$databases
=
&
EnsEMBL::DB::Core::
get_databases
(
@dbs
);
die
"
Problem connecting to databases:
$databases
->{'error'}
\n
"
if
$databases
->
{'
error
'}
;
warn
"
Database error:
$databases
->{'non_fatal_error'}
\n
"
if
$databases
->
{'
non_fatal_error
'};
## get slice and attribute adaptors, loop over all toplevel slices
my
$slice_adaptor
=
$databases
->
{'
core
'}
->
get_SliceAdaptor
();
my
$attrib_adaptor
=
$databases
->
{'
core
'}
->
get_AttributeAdaptor
();
my
$top_slices
=
$slice_adaptor
->
fetch_all
("
toplevel
");
foreach
my
$slice
(
@$top_slices
)
{
my
%num
;
my
@attribs
;
print
STDERR
"
Processing seq_region
",
$slice
->
seq_region_name
(),
"
:
\n
";
## genes
if
(
$run
{'
gene
'})
{
print
STDERR
"
\t
Genes...
\n
";
my
@genes
=
@
{
$slice
->
get_all_Genes
()};
foreach
my
$gene
(
@genes
)
{
my
$type
=
$gene
->
type
;
$num
{
$type
}
++
;
if
(
$type
=~
/seudogene$/
)
{
$num
{'
Total_Pseudogenes
'}
++
;
}
}
print
STDERR
"
Slice
",
$slice
->
seq_region_name
(),
"
has the following features:
\n\n
";
foreach
my
$type
(
keys
%num
)
{
print
STDERR
"
$type
=
$num
{
$type
}
\n
";
}
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Known genes
',
-
CODE
=>
'
KnownGeneCount
',
-
VALUE
=>
$num
{'
Known
'},
-
DESCRIPTION
=>
'
Total Number of Known genes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Novel CDS
',
-
CODE
=>
'
NovelCDSCount
',
-
VALUE
=>
$num
{'
Novel_CDS
'},
-
DESCRIPTION
=>
'
Total Number of Novel CDSs
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Novel transcripts
',
-
CODE
=>
'
NovelTransCount
',
-
VALUE
=>
$num
{'
Novel_Transcript
'},
-
DESCRIPTION
=>
'
Total Number of Novel transcripts
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Putative transcripts
',
-
CODE
=>
'
PutTransCount
',
-
VALUE
=>
$num
{'
Putative
'},
-
DESCRIPTION
=>
'
Total Number of Putative transcripts
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Predicted transcripts
',
-
CODE
=>
'
PredTransCount
',
-
VALUE
=>
$num
{'
Predicted_Gene
'},
-
DESCRIPTION
=>
'
Total Number of Predicted transcripts
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Total Pseudogenes
',
-
CODE
=>
'
TotPsCount
',
-
VALUE
=>
$num
{'
Total_Pseudogenes
'},
-
DESCRIPTION
=>
'
Total Number of Pseudogenes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Unclassified pseudogenes
',
-
CODE
=>
'
UnclassPsCount
',
-
VALUE
=>
$num
{'
Pseudogene
'},
-
DESCRIPTION
=>
'
Number of Unclassified pseudogenes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Processed pseudogenes
',
-
CODE
=>
'
ProcPsCount
',
-
VALUE
=>
$num
{'
Processed_pseudogene
'},
-
DESCRIPTION
=>
'
Number of Processed pseudogenes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Unprocessed pseudogenes
',
-
CODE
=>
'
UnprocPsCount
',
-
VALUE
=>
$num
{'
Unprocessed_pseudogene
'},
-
DESCRIPTION
=>
'
Number of Unprocessed pseudogenes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Ig Segments
',
-
CODE
=>
'
IgSegCount
',
-
VALUE
=>
$num
{'
Ig_Segment
'},
-
DESCRIPTION
=>
'
Total Number of Ig Segments
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Ig Pseudogene Segments
',
-
CODE
=>
'
IgPsSegCount
',
-
VALUE
=>
$num
{'
Ig_Pseudogene_Segment
'},
-
DESCRIPTION
=>
'
Total Number of Ig Pseudogene Segments
');
}
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Known Genes
',
-
CODE
=>
'
KnownGeneCount
',
-
VALUE
=>
$num_known_genes
,
-
DESCRIPTION
=>
'
Total Number of Known Genes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Novel CDS
',
-
CODE
=>
'
NovelCDSCount
',
-
VALUE
=>
$num_nov_CDS
,
-
DESCRIPTION
=>
'
Total Number of Novel CDSs
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Novel transcripts
',
-
CODE
=>
'
NovelTransCount
',
-
VALUE
=>
$num_nov_trans
,
-
DESCRIPTION
=>
'
Total Number of Novel Transcripts
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Putative transcripts
',
-
CODE
=>
'
PutTransCount
',
-
VALUE
=>
$num_put_trans
,
-
DESCRIPTION
=>
'
Total Number of Putative Transcripts
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Predicted transcripts
',
-
CODE
=>
'
PredTransCount
',
-
VALUE
=>
$num_pred_trans
,
-
DESCRIPTION
=>
'
Total Number of Predicted Transcripts
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Total Pseudogenes
',
-
CODE
=>
'
TotPsCount
',
-
VALUE
=>
$num_tot_pseudo_genes
,
-
DESCRIPTION
=>
'
Total Number of Pseudogenes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Unclassified Pseudogenes
',
-
CODE
=>
'
UnclassPsCount
',
-
VALUE
=>
$num_unclass_pseudo_genes
,
-
DESCRIPTION
=>
'
Number of Unclassified Pseudogenes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Processed Pseudogenes
',
-
CODE
=>
'
ProcPsCount
',
-
VALUE
=>
$num_proc_pseudo_genes
,
-
DESCRIPTION
=>
'
Number of Processed Pseudogenes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Unprocessed Pseudogenes
',
-
CODE
=>
'
UnprocPsCount
',
-
VALUE
=>
$num_unproc_pseudo_genes
,
-
DESCRIPTION
=>
'
Number of Unprocessed Pseudogenes
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Ig Segments
',
-
CODE
=>
'
IgSegCount
',
-
VALUE
=>
$num_Ig_segments
,
-
DESCRIPTION
=>
'
Total Number of Ig Segments
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
Ig Pseudogene Segments
',
-
CODE
=>
'
IgPsSegCount
',
-
VALUE
=>
$num_pred_Ig_pseudogenes
,
-
DESCRIPTION
=>
'
Total Number of Ig Pseudogene Segments
');
if
(
$snps_present
)
{
my
$snps
=
$slice
->
get_all_SNPs
();
## snps
if
(
$run
{'
snp
'})
{
print
STDERR
"
\t
SNPs...
\n
";
my
$snps
=
$slice
->
get_all_ExternalFeatures
('
GlovarSNP
');
push
@attribs
,
Bio::EnsEMBL::
Attribute
->
new
(
-
NAME
=>
'
SNP
Count
',
(
-
NAME
=>
'
SNP
s
',
-
CODE
=>
'
SNPCount
',
-
VALUE
=>
scalar
(
@$snps
),
-
DESCRIPTION
=>
'
Total Number of SNPs
');
...
...
@@ -209,43 +204,6 @@ sub print_chromo_stats {
}
}
#
# tries to attach lite.
#
sub
lite_attach
{
my
$db
=
shift
;
my
$core_db_name
;
$core_db_name
=
$db
->
dbname
();
if
(
$core_db_name
!~
/_core_/
)
{
return
0
;
}
#
# get a lost of all databases on that server
#
my
$sth
=
$db
->
prepare
(
"
show databases
"
);
$sth
->
execute
();
my
$all_db_names
=
$sth
->
fetchall_arrayref
();
my
%all_db_names
=
map
{(
$_
->
[
0
]
,
1
)}
@$all_db_names
;
my
$snp_db_name
=
$core_db_name
;
$snp_db_name
=~
s/_core_/_lite_/
;
if
(
!
exists
$all_db_names
{
$snp_db_name
}
)
{
return
0
;
}
my
$snp_db
=
Bio::EnsEMBL::Lite::
DBAdaptor
->
new
(
-
host
=>
$db
->
host
(),
-
user
=>
$db
->
username
(),
-
pass
=>
$db
->
password
(),
-
port
=>
$db
->
port
(),
-
dbname
=>
$snp_db_name
);
$db
->
add_db_adaptor
(
"
lite
",
$snp_db
);
return
1
;
}
1
;
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment