Newer
Older
Rhoda Kinsella
committed
### This script is used to mark exons which are used in more than one
### transcript linked to a single gene. The script does not limit
### by differing biotypes. Exons are identified as being the same if they
### occupy identical genomic locations if the -uselocations flag is specified
Rhoda Kinsella
committed
use strict;
use warnings;
use Getopt::Long;
use DBI;
use Bio::EnsEMBL::DBSQL::DBAdaptor;
sub usage {
print("Command line switches:\n");
print("\t--dbhost=<host> \tDatabase server host\n");
print("\t--dbport=<port> \tDatabase server port (optional)\n");
print("\t--dbuser=<user> \tDatabase user\n");
print("\t--dbpass=<password> \tUser password (optional)\n");
print("\t--dbpattern=<regex> "
. "\tRegular expresssion to match database names\n" );
print("\t--uselocations "
. "\tUse locations to find indentical exons rather than DB identifiers(optional)\n" );
Rhoda Kinsella
committed
print("\t--help "
. "\tDisplays this info and exits (optional)\n" );
exit;
}
my ( $dbhost, $dbport, $dbuser, $dbpass, $dbpattern, $uselocations );
Rhoda Kinsella
committed
GetOptions(
'dbhost|host=s' => \$dbhost,
'dbport|port=i' => \$dbport,
'dbuser|user=s' => \$dbuser,
'dbpass|pass=s' => \$dbpass,
'dbpattern|pattern=s' => \$dbpattern,
'uselocations' => \$uselocations,
Rhoda Kinsella
committed
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
'help|h' => \&usage
);
$dbport ||= 3306;
if ( !( defined($dbhost) && defined($dbuser) && defined($dbpattern) ) )
{
usage();
}
my $dsn = sprintf( 'dbi:mysql:host=%s;port=%d', $dbhost, $dbport );
my $dbh = DBI->connect( $dsn, $dbuser, $dbpass );
my @dbnames =
map { $_->[0] } @{ $dbh->selectall_arrayref('SHOW DATABASES') };
foreach my $dbname (@dbnames) {
if ( $dbname !~ /$dbpattern/ ) { next }
printf( "Connecting to '%s'\n", $dbname );
my $dba = new Bio::EnsEMBL::DBSQL::DBAdaptor(
'-host' => $dbhost,
'-port' => $dbport,
'-user' => $dbuser,
'-pass' => $dbpass,
'-dbname' => $dbname,
'-species' => $dbname
);
my $gene_adaptor = $dba->get_GeneAdaptor();
# Retrieve all Gene dbIDs
my @gene_dbIDs = sort { $a <=> $b } @{ $gene_adaptor->list_dbIDs() };
my $gene_count = scalar(@gene_dbIDs);
printf( "There are %d genes to go through...\n", $gene_count );
my ( $update_0, $update_1 ) = ( 0, 0 );
my ( $gene_counter, $exon_counter ) = ( 0, 0 );
while (
my @gene_list = @{
$gene_adaptor->fetch_all_by_dbID_list(
[ splice( @gene_dbIDs, 0, 1000 ) ] ) } )
{
while ( my $gene = shift(@gene_list) ) {
my @transcripts = @{ $gene->get_all_Transcripts() };
my $transcript_count = scalar(@transcripts);
my %exon_count;
my %exon_object;
foreach my $transcript (@transcripts) {
foreach my $exon ( @{ $transcript->get_all_Exons() } ) {
my $key = exon_key($exon);
++$exon_count{$key};
$exon_object{$key} = $exon;
Rhoda Kinsella
committed
}
}
foreach my $exon_key ( keys(%exon_count) ) {
my $exon = $exon_object{$exon_key};
my $is_constitutive = $exon->is_constitutive();
if ( $exon_count{$exon_key} == $transcript_count ) {
if ( !$exon->is_constitutive() ) {
$is_constitutive = 1;
Rhoda Kinsella
committed
++$update_1;
}
} else {
if ( $exon_object{$exon_key}->is_constitutive() ) {
$is_constitutive = 0;
Rhoda Kinsella
committed
++$update_0;
}
}
-SQL => 'update exon set is_constitutive = ? where exon_id =?',
-PARAMS => [$is_constitutive, $exon->dbID()]
);
Rhoda Kinsella
committed
if ( ( ++$exon_counter % 1000 ) == 0 ) {
printf(
"After %d exons (%d genes, %.3g%%):\n"
. "\tupdated %d to constitutive, "
. "%d to non-constitutive\n",
$exon_counter, $gene_counter, 100*$gene_counter/$gene_count,
$update_1, $update_0
);
}
Rhoda Kinsella
committed
++$gene_counter;
} ## end while ( my $gene = shift(...))
} ## end while ( my @gene_list = @...)
Rhoda Kinsella
committed
print( '-' x 4, '{ ', $dbname, ' }',
'-' x ( 80 - ( length($dbname) + 4 ) ), "\n" );
print("Summary:\n");
printf( "\t%d exons in total, %d genes\n",
$exon_counter, $gene_counter );
printf( "\tUpdated %d exons to constitutive\n", $update_1 );
printf( "\tUpdated %d exons to non-constitutive\n", $update_0 );
Rhoda Kinsella
committed
print("\n");
} ## end foreach my $dbname (@dbnames)
sub exon_key {
my ($e) = @_;
if($uselocations) {
return join(q{:},
($e->slice()->name() ? $e->slice()->name() : 'undef'),
$e->seq_region_start(),
$e->seq_region_end(),
$e->seq_region_strand()
);
}
return $e->dbID();
}