Commit aeb6dd59 authored by Gautier Koscielny's avatar Gautier Koscielny
Browse files

Option output_file|o added to store the GFF output stream in a specific file.

parent 672ce2c5
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
=head1 SYNOPSIS =head1 SYNOPSIS
Fetch_gff.pl -dbhost host -dbuser ensro -dbname homo_sapiens_core_57_37b Fetch_gff.pl -dbhost host -dbuser ensro -dbname homo_sapiens_core_58_37c -output_file homo_sapiens_core_58_37c_variants.gff
=head1 DESCRIPTION =head1 DESCRIPTION
...@@ -39,16 +39,17 @@ from an Ensembl core database. ...@@ -39,16 +39,17 @@ from an Ensembl core database.
here is an example commandline here is an example commandline
./Fetch_gff.pl -dbhost host -dbuser user -dbname my_db -dbpass **** ./Fetch_gff.pl -dbhost host -dbuser user -dbname my_db -dbpass **** -output_file transcript_variants.gff
=head1 OPTIONS =head1 OPTIONS
-dbhost host name for database (gets put as host= in locator) -dbhost host name for database (gets put as host= in locator)
-dbname what name to connect to (dbname= in locator) -dbname what name to connect to (dbname= in locator)
-dbuser what username to connect as (dbuser= in locator) -dbuser what username to connect as (dbuser= in locator)
-dbpass what password to use (dbpass= in locator) -dbpass what password to use (dbpass= in locator)
-chr which chromosome (optional) -chr which chromosome (optional)
-help displays this documentation with PERLDOC -output_file|-o where the GFF output is written (optional, STDOUT by default)
-help displays this documentation with PERLDOC
=cut =cut
...@@ -65,96 +66,110 @@ use Getopt::Long; ...@@ -65,96 +66,110 @@ use Getopt::Long;
use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::Utils::Exception qw(throw warning);
{ # block to avoid namespace pollution { # block to avoid namespace pollution
my $host = ''; my $host = '';
my $port = ''; my $port = '';
my $dbname = ''; my $dbname = '';
my $dbuser = ''; my $dbuser = '';
my $dbpass = ''; my $dbpass = '';
my $chr = undef; my $chr = undef;
my $help; my $output_file = undef;
my @coord_system; my $help;
my @coord_system;
&GetOptions(
'dbhost:s' => \$host, &GetOptions(
'dbport:n' => \$port, 'dbhost:s' => \$host,
'dbname:s' => \$dbname, 'dbport:n' => \$port,
'dbuser:s' => \$dbuser, 'dbname:s' => \$dbname,
'chr:s' => \$chr, 'dbuser:s' => \$dbuser,
'dbpass:s' => \$dbpass, 'dbpass:s' => \$dbpass,
'h|help' => \$help, 'chr:s' => \$chr,
) or ($help = 1); 'output_file|o=s' => \$output_file,
'h|help' => \$help,
if(!$host || !$dbuser || !$dbname || !$dbpass){ ) or ($help = 1);
print STDERR "Can't get any information without database details\n";
print STDERR "-dbhost $host -dbuser $dbuser -dbname $dbname ". if(!$host || !$dbuser || !$dbname){
" -dbpass $dbpass\n"; print STDERR "Can't get any information without database details\n";
$help = 1; print STDERR "-dbhost '$host' -dbuser '$dbuser' -dbname '$dbname' ".
} " -dbpass '$dbpass'\n";
$help = 1;
if ($help) { }
exec('perldoc', $0);
} if ($help) {
exec('perldoc', $0);
my $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new }
(-dbname => $dbname,
-host => $host,
-user => $dbuser, my $output_stream;
-port => $port,
-pass => $dbpass); if (defined($output_file)) {
open ($output_stream, ">$output_file") || throw "Can't open '$output_file' file for writing\n";
my $gene_adaptor = $db->get_GeneAdaptor();
my @stable_gene_ids = undef; } else {
my $size = 0;
$output_stream = \*STDOUT;
if (defined($chr)) { print STDERR "Will write GFF stream to the standard output.\n";
}
my $slice_adaptor = $db->get_SliceAdaptor();
my $slice = $slice_adaptor->fetch_by_region( 'chromosome', $chr ); my $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new
@stable_gene_ids = @{ $gene_adaptor->fetch_all_by_Slice($slice) }; (-dbname => $dbname,
$size = scalar @stable_gene_ids; -host => $host,
print STDERR "Number of stable genes on region $chr:\t" . $size . "\n"; -user => $dbuser,
-port => $port,
} else { -pass => $dbpass);
@stable_gene_ids = @{$gene_adaptor->list_stable_ids()};
$size = scalar @stable_gene_ids; my $gene_adaptor = $db->get_GeneAdaptor();
print STDERR "Number of stable ids:\t" . $size . "\n"; my @stable_gene_ids = undef;
} my $size = 0;
for my $id (@stable_gene_ids) { if (defined($chr)) {
my $gene = ($chr) ? $id : $gene_adaptor->fetch_by_stable_id($id); my $slice_adaptor = $db->get_SliceAdaptor();
my $slice = $slice_adaptor->fetch_by_region( 'chromosome', $chr );
my $gene_id = $gene->display_id(); @stable_gene_ids = @{ $gene_adaptor->fetch_all_by_Slice($slice) };
my $biotype = $gene->biotype(); $size = scalar @stable_gene_ids;
my $chr = $gene->slice->seq_region_name(); print STDERR "Number of stable genes on region $chr:\t" . $size . "\n";
my $strand = $gene->strand();
my $start = $gene->start(); } else {
my $end = $gene->end();
@stable_gene_ids = @{$gene_adaptor->list_stable_ids()};
my @transcripts = @{$gene->get_all_Transcripts()}; $size = scalar @stable_gene_ids;
for my $transcript (@transcripts) { print STDERR "Number of stable ids:\t" . $size . "\n";
}
my $transcr_id = $transcript->display_id() ; ;
for my $id (@stable_gene_ids) {
#Get the exons + print info.
my $exons = $transcript->get_all_Exons() ; my $gene = ($chr) ? $id : $gene_adaptor->fetch_by_stable_id($id);
foreach my $exon (@$exons) { my $gene_id = $gene->display_id();
my $exon_id = $exon->display_id() ; my $biotype = $gene->biotype();
my $exon_start = $exon->start() ; my $chr = $gene->slice->seq_region_name();
my $exon_end = $exon->end() ; my $strand = $gene->strand();
my $exon_std = $exon->strand() ; my $start = $gene->start();
my $slice = $exon->slice->seq_region_name(); my $end = $gene->end();
$exon_std =~ s/-1/-/ ;
$exon_std =~ s/1/+/ ; my @transcripts = @{$gene->get_all_Transcripts()};
print "$chr\tEnsembl\texon\t$exon_start\t$exon_end\t.\t$exon_std\t.\tgene_id \"$gene_id\"; transcript_id \"$transcr_id\"; exon_id \"$exon_id\"\n" ; for my $transcript (@transcripts) {
}
} my $transcr_id = $transcript->display_id() ; ;
} #Get the exons + print info.
my $exons = $transcript->get_all_Exons() ;
exit 0;
foreach my $exon (@$exons) {
my $exon_id = $exon->display_id() ;
my $exon_start = $exon->start() ;
my $exon_end = $exon->end() ;
my $exon_std = $exon->strand() ;
my $slice = $exon->slice->seq_region_name();
$exon_std =~ s/-1/-/ ;
$exon_std =~ s/1/+/ ;
print $output_stream "$chr\tEnsembl\texon\t$exon_start\t$exon_end\t.\t$exon_std\t.\tgene_id \"$gene_id\"; transcript_id \"$transcr_id\"; exon_id \"$exon_id\"\n" ;
}
}
}
exit 0;
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment