Skip to content
Snippets Groups Projects
Commit aeb6dd59 authored by Gautier Koscielny's avatar Gautier Koscielny
Browse files

Option output_file|o added to store the GFF output stream in a specific file.

parent 672ce2c5
No related branches found
No related tags found
No related merge requests found
......@@ -30,7 +30,7 @@
=head1 SYNOPSIS
Fetch_gff.pl -dbhost host -dbuser ensro -dbname homo_sapiens_core_57_37b
Fetch_gff.pl -dbhost host -dbuser ensro -dbname homo_sapiens_core_58_37c -output_file homo_sapiens_core_58_37c_variants.gff
=head1 DESCRIPTION
......@@ -39,16 +39,17 @@ from an Ensembl core database.
here is an example commandline
./Fetch_gff.pl -dbhost host -dbuser user -dbname my_db -dbpass ****
./Fetch_gff.pl -dbhost host -dbuser user -dbname my_db -dbpass **** -output_file transcript_variants.gff
=head1 OPTIONS
-dbhost host name for database (gets put as host= in locator)
-dbname what name to connect to (dbname= in locator)
-dbuser what username to connect as (dbuser= in locator)
-dbpass what password to use (dbpass= in locator)
-chr which chromosome (optional)
-help displays this documentation with PERLDOC
-dbhost host name for database (gets put as host= in locator)
-dbname what name to connect to (dbname= in locator)
-dbuser what username to connect as (dbuser= in locator)
-dbpass what password to use (dbpass= in locator)
-chr which chromosome (optional)
-output_file|-o where the GFF output is written (optional, STDOUT by default)
-help displays this documentation with PERLDOC
=cut
......@@ -65,96 +66,110 @@ use Getopt::Long;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
{ # block to avoid namespace pollution
my $host = '';
my $port = '';
my $dbname = '';
my $dbuser = '';
my $dbpass = '';
my $chr = undef;
my $help;
my @coord_system;
&GetOptions(
'dbhost:s' => \$host,
'dbport:n' => \$port,
'dbname:s' => \$dbname,
'dbuser:s' => \$dbuser,
'chr:s' => \$chr,
'dbpass:s' => \$dbpass,
'h|help' => \$help,
) or ($help = 1);
if(!$host || !$dbuser || !$dbname || !$dbpass){
print STDERR "Can't get any information without database details\n";
print STDERR "-dbhost $host -dbuser $dbuser -dbname $dbname ".
" -dbpass $dbpass\n";
$help = 1;
}
if ($help) {
exec('perldoc', $0);
}
my $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new
(-dbname => $dbname,
-host => $host,
-user => $dbuser,
-port => $port,
-pass => $dbpass);
my $gene_adaptor = $db->get_GeneAdaptor();
my @stable_gene_ids = undef;
my $size = 0;
if (defined($chr)) {
my $slice_adaptor = $db->get_SliceAdaptor();
my $slice = $slice_adaptor->fetch_by_region( 'chromosome', $chr );
@stable_gene_ids = @{ $gene_adaptor->fetch_all_by_Slice($slice) };
$size = scalar @stable_gene_ids;
print STDERR "Number of stable genes on region $chr:\t" . $size . "\n";
} else {
@stable_gene_ids = @{$gene_adaptor->list_stable_ids()};
$size = scalar @stable_gene_ids;
print STDERR "Number of stable ids:\t" . $size . "\n";
}
for my $id (@stable_gene_ids) {
my $gene = ($chr) ? $id : $gene_adaptor->fetch_by_stable_id($id);
my $gene_id = $gene->display_id();
my $biotype = $gene->biotype();
my $chr = $gene->slice->seq_region_name();
my $strand = $gene->strand();
my $start = $gene->start();
my $end = $gene->end();
my @transcripts = @{$gene->get_all_Transcripts()};
for my $transcript (@transcripts) {
my $transcr_id = $transcript->display_id() ; ;
#Get the exons + print info.
my $exons = $transcript->get_all_Exons() ;
foreach my $exon (@$exons) {
my $exon_id = $exon->display_id() ;
my $exon_start = $exon->start() ;
my $exon_end = $exon->end() ;
my $exon_std = $exon->strand() ;
my $slice = $exon->slice->seq_region_name();
$exon_std =~ s/-1/-/ ;
$exon_std =~ s/1/+/ ;
print "$chr\tEnsembl\texon\t$exon_start\t$exon_end\t.\t$exon_std\t.\tgene_id \"$gene_id\"; transcript_id \"$transcr_id\"; exon_id \"$exon_id\"\n" ;
}
}
}
exit 0;
my $host = '';
my $port = '';
my $dbname = '';
my $dbuser = '';
my $dbpass = '';
my $chr = undef;
my $output_file = undef;
my $help;
my @coord_system;
&GetOptions(
'dbhost:s' => \$host,
'dbport:n' => \$port,
'dbname:s' => \$dbname,
'dbuser:s' => \$dbuser,
'dbpass:s' => \$dbpass,
'chr:s' => \$chr,
'output_file|o=s' => \$output_file,
'h|help' => \$help,
) or ($help = 1);
if(!$host || !$dbuser || !$dbname){
print STDERR "Can't get any information without database details\n";
print STDERR "-dbhost '$host' -dbuser '$dbuser' -dbname '$dbname' ".
" -dbpass '$dbpass'\n";
$help = 1;
}
if ($help) {
exec('perldoc', $0);
}
my $output_stream;
if (defined($output_file)) {
open ($output_stream, ">$output_file") || throw "Can't open '$output_file' file for writing\n";
} else {
$output_stream = \*STDOUT;
print STDERR "Will write GFF stream to the standard output.\n";
}
my $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new
(-dbname => $dbname,
-host => $host,
-user => $dbuser,
-port => $port,
-pass => $dbpass);
my $gene_adaptor = $db->get_GeneAdaptor();
my @stable_gene_ids = undef;
my $size = 0;
if (defined($chr)) {
my $slice_adaptor = $db->get_SliceAdaptor();
my $slice = $slice_adaptor->fetch_by_region( 'chromosome', $chr );
@stable_gene_ids = @{ $gene_adaptor->fetch_all_by_Slice($slice) };
$size = scalar @stable_gene_ids;
print STDERR "Number of stable genes on region $chr:\t" . $size . "\n";
} else {
@stable_gene_ids = @{$gene_adaptor->list_stable_ids()};
$size = scalar @stable_gene_ids;
print STDERR "Number of stable ids:\t" . $size . "\n";
}
for my $id (@stable_gene_ids) {
my $gene = ($chr) ? $id : $gene_adaptor->fetch_by_stable_id($id);
my $gene_id = $gene->display_id();
my $biotype = $gene->biotype();
my $chr = $gene->slice->seq_region_name();
my $strand = $gene->strand();
my $start = $gene->start();
my $end = $gene->end();
my @transcripts = @{$gene->get_all_Transcripts()};
for my $transcript (@transcripts) {
my $transcr_id = $transcript->display_id() ; ;
#Get the exons + print info.
my $exons = $transcript->get_all_Exons() ;
foreach my $exon (@$exons) {
my $exon_id = $exon->display_id() ;
my $exon_start = $exon->start() ;
my $exon_end = $exon->end() ;
my $exon_std = $exon->strand() ;
my $slice = $exon->slice->seq_region_name();
$exon_std =~ s/-1/-/ ;
$exon_std =~ s/1/+/ ;
print $output_stream "$chr\tEnsembl\texon\t$exon_start\t$exon_end\t.\t$exon_std\t.\tgene_id \"$gene_id\"; transcript_id \"$transcr_id\"; exon_id \"$exon_id\"\n" ;
}
}
}
exit 0;
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment