Commit 890c9c66 authored by Amonida Zadissa's avatar Amonida Zadissa
Browse files

Ignore transcripts with biotypes 'nonsense_mediated_decay' and

processed_transcript', if gene biotype is protein_coding.
parent ecdb4009
......@@ -30,91 +30,99 @@ my $write = 0;
my $include_non_ref = 1;
my $verbose = 0;
&GetOptions(
'dbhost:s' => \$host,
'dbport:n' => \$port,
'dbname:s' => \$dbname,
'dbuser:s' => \$user,
'dbpass:s' => \$pass,
'coord_system_name:s' => \$coord_system,
'seq_region_name:s' => \$seq_region_name,
'write!' => \$write,
'include_non_ref!' => \$include_non_ref,
'verbose!' => \$verbose,
);
unless ( $write ) {
print " you've not used the -write option so results will not be written into the database\n" ;
}
my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(
-host => $host,
-user => $user,
-port => $port,
-dbname => $dbname,
-pass => $pass,
);
&GetOptions( 'dbhost:s' => \$host,
'dbport:n' => \$port,
'dbname:s' => \$dbname,
'dbuser:s' => \$user,
'dbpass:s' => \$pass,
'coord_system_name:s' => \$coord_system,
'seq_region_name:s' => \$seq_region_name,
'write!' => \$write,
'include_non_ref!' => \$include_non_ref,
'verbose!' => \$verbose, );
unless ($write) {
print
" you've not used the -write option so results will not be written into the database\n";
}
my $db =
new Bio::EnsEMBL::DBSQL::DBAdaptor( -host => $host,
-user => $user,
-port => $port,
-dbname => $dbname,
-pass => $pass, );
my $sa = $db->get_SliceAdaptor;
my $slices;
if($seq_region_name){
my $slice = $sa->fetch_by_region($coord_system, $seq_region_name, $include_non_ref);
push(@$slices, $slice);
}else{
$slices = $sa->fetch_all($coord_system, '', $include_non_ref);
if ($seq_region_name) {
my $slice =
$sa->fetch_by_region( $coord_system, $seq_region_name, $include_non_ref );
push( @$slices, $slice );
} else {
$slices = $sa->fetch_all( $coord_system, '', $include_non_ref );
}
my $update_to_null = "update gene set canonical_transcript_id = NULL, canonical_annotation = NULL";
$db->dbc->do($update_to_null);
# get $db->dbc->db_handle->do($update_to_null) instead if above not working
my $gene_update_sql = "update gene set canonical_transcript_id = ? where gene_id = ?";
my $sth = $db->dbc->prepare($gene_update_sql);
SLICE:foreach my $slice(@$slices){
print "Getting genes for ".$slice->name."\n" if($verbose);
my $genes = $slice->get_all_Genes(undef, undef, 1);
SLICE: foreach my $slice (@$slices) {
print "Getting genes for " . $slice->name . "\n" if ($verbose);
my $genes = $slice->get_all_Genes( undef, undef, 1 );
my %canonical;
GENE:foreach my $gene(@$genes){
print "Updating gene: ",$gene->dbID,"\n";
GENE: foreach my $gene (@$genes) {
print "Updating gene: ", $gene->dbID, "\n";
my $transcripts = $gene->get_all_Transcripts;
if(@$transcripts == 1){
$canonical{$gene->dbID} = $transcripts->[0]->dbID;
if ( @$transcripts == 1 ) {
$canonical{ $gene->dbID } = $transcripts->[0]->dbID;
next GENE;
}
my $has_translation =0;
my $count = 0;
my $has_translation = 0;
my $count = 0;
my @with_translation;
my @no_translation;
foreach my $transcript(@$transcripts){
if($transcript->translation && ($gene->biotype ne 'processed_transcript')
&& ($gene->biotype ne 'pseudogene')){
unless($transcript->translation->seq =~ /\*/){
push(@with_translation, $transcript);
}
}else{
push(@no_translation, $transcript);
foreach my $transcript (@$transcripts) {
if ( $transcript->translation
&& ( $gene->biotype ne 'processed_transcript' )
&& ( $gene->biotype ne 'pseudogene' )
&& ( $transcript->biotype ne 'nonsense_mediated_decay' )
&& ( $transcript->biotype ne 'processed_transcript' ) )
{
unless ( $transcript->translation->seq =~ /\*/ ) {
push( @with_translation, $transcript );
}
} else {
push( @no_translation, $transcript );
}
}
my @sorted;
if(@with_translation){
if (@with_translation) {
my @len_and_trans;
foreach my $trans (@with_translation) {
my $h = { trans => $trans, len => $trans->translate->length };
push @len_and_trans,$h;
push @len_and_trans, $h;
}
my @tmp_sorted = sort { $b->{len} <=> $a->{len} } @len_and_trans;
foreach my $h (@tmp_sorted) {
#print "Adding to sorted " . $h->{trans}->dbID . "\n";
push @sorted,$h->{trans};
push @sorted, $h->{trans};
}
}else{
@sorted = sort {$b->length <=> $a->length} @no_translation;
} else {
@sorted = sort { $b->length <=> $a->length } @no_translation;
}
$canonical{$gene->dbID} = $sorted[0]->dbID;
}
foreach my $gene_id (keys(%canonical)){
$canonical{ $gene->dbID } = $sorted[0]->dbID;
} ## end foreach my $gene (@$genes)
foreach my $gene_id ( keys(%canonical) ) {
my $transcript_id = $canonical{$gene_id};
$sth->execute($transcript_id, $gene_id) if($write);
$sth->execute( $transcript_id, $gene_id ) if ($write);
}
}
} ## end foreach my $slice (@$slices)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment