emit_canonical_encodings.pl 5.27 KB
Newer Older
1
#!/usr/bin/env perl
Magali Ruffier's avatar
Magali Ruffier committed
2
# Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Tiago Grego's avatar
Tiago Grego committed
3
# Copyright [2016-2019] EMBL-European Bioinformatics Institute
4 5 6 7 8 9 10 11 12 13 14 15 16
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#      http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107

use strict;
use warnings;

use Getopt::Long;

use Bio::EnsEMBL::Utils::Exception;
use Bio::EnsEMBL::Utils::TranscriptSelector;
use Bio::EnsEMBL::DBSQL::DBAdaptor;

my ($host, $port, $dbname, $user,$pass);
my ($dnahost, $dnaport, $dnadbname, $dnauser, $dnapass);
my ($ccds_host, $ccds_dbname, $ccds_user, $ccds_port, $ccds_pass);

my $transcript;
my $gene;

GetOptions( 'dbhost:s'            => \$host,
            'dbport:n'            => \$port,
            'dbname:s'            => \$dbname,
            'dbuser:s'            => \$user,
            'dbpass:s'            => \$pass,
            'dnahost:s'           => \$dnahost,
            'dnadbname:s'         => \$dnadbname,
            'dnauser:s'           => \$dnauser,
            'dnapass:s'           => \$dnapass,
            'dnaport:s'           => \$dnaport,
            'ccdshost:s'          => \$ccds_host,
            'ccdsdbname:s'        => \$ccds_dbname,
            'ccdsuser:s'          => \$ccds_user,
            'ccdsport:s'          => \$ccds_port,
            'ccdspass:s'          => \$ccds_pass,
            'transcript:s'        => \$transcript,
            'gene:s'              => \$gene,
            );


my $dba =
  new Bio::EnsEMBL::DBSQL::DBAdaptor( -host   => $host,
                                      -user   => $user,
                                      -port   => $port,
                                      -dbname => $dbname,
                                      -pass   => $pass,
                                      -species => 'default',
                                      );
                                      
if($dnadbname) {
  if(!$dnauser || !$dnahost) {
    throw ("You must provide user, host and dbname details to connect to DNA DB!");
  }
  my $dna_db =   new Bio::EnsEMBL::DBSQL::DBAdaptor( -host   => $dnahost,
                                      -user       => $dnauser,
                                      -port       => $dnaport,
                                      -dbname     => $dnadbname,
                                      -pass       => $dnapass,
                                      -species    => 'dna_'.$dba->species()
                                      );
  $dba->dnadb($dna_db);
}

my $ccds_dba;

if ($ccds_dbname) {
  if (!$ccds_user || !$ccds_host) {
    throw ("You must provide user, host and dbname details to connect to CCDS DB!");
  }
  $ccds_dba = 
  new Bio::EnsEMBL::DBSQL::DBAdaptor( -host   => $ccds_host,
                                      -user   => $ccds_user,
                                      -port   => $ccds_port,
                                      -pass   => $ccds_pass,
                                      -dbname => $ccds_dbname,
                                      -species => 'ccds_'.$dba->species() );
}

my $transcript_selector = Bio::EnsEMBL::Utils::TranscriptSelector->new($ccds_dba, 1);

my $gene_object;

if($gene) {
  printf 'Using "%s" as our Gene stable ID'."\n", $gene;
  $gene_object = $dba->get_GeneAdaptor()->fetch_by_stable_id($gene);
}
elsif($transcript) {
  printf 'Using "%s" as a Transcript to find a Gene'."\n", $transcript;
  my $t = $dba->get_TranscriptAdaptor()->fetch_by_stable_id($transcript);
  $gene_object = $t->get_Gene();
  printf 'Using "%s" as our Gene'."\n", $gene_object->stable_id();
}

$transcript_selector->select_canonical_transcript_for_Gene($gene_object);
108
print "Original: ".$gene_object->canonical_transcript->stable_id."\n";
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161

sub usage {
print "
Example usage: perl emit_canonical_encodings.pl -dbhost host -dbuser user 
     -dbpass *** -dbname dbname -dbport 3306 -transcript ENST00019112
     
Example usage: perl emit_canonical_encodings.pl -dbhost host -dbuser user 
     -dbpass *** -dbname dbname -dbport 3306 -gene ENSG00019111

Script options:

    -dbname       Database name

    -dbhost       Database host

    -dbport       Database port

    -dbuser       Database user

    -dbpass       Database password

Optional DB connection arguments:

    -dnadbname    DNA Database name

    -dnadbhost    DNA Database host

    -dnadbuser    DNA Database user
    
    -dnadbport    DNA Database port
    
    -dnadbpass    DNA Database pass

    -ccdsdbname  CCDS database name

    -ccdshost    CCDS database host

    -ccdsuser    CCDS database user
    
    -ccdspass    CCDS database pass
    
    -ccdsport    CCDS database port

Search params:

    -transcript    The transcript stable ID to use to find the gene in question

    -gene          The gene stable ID to emit encoded transcripts for

A warning about not using CCDS is perfectly acceptible when not running on
Human, Mouse and Zebrafish.
";
    
premanand17's avatar
premanand17 committed
162
}