Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl
Commits
890c9c66
Commit
890c9c66
authored
Jan 14, 2010
by
Amonida Zadissa
Browse files
Ignore transcripts with biotypes 'nonsense_mediated_decay' and
processed_transcript', if gene biotype is protein_coding.
parent
ecdb4009
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
64 additions
and
56 deletions
+64
-56
misc-scripts/canonical_transcripts/set_canonical_transcripts.pl
...cripts/canonical_transcripts/set_canonical_transcripts.pl
+64
-56
No files found.
misc-scripts/canonical_transcripts/set_canonical_transcripts.pl
View file @
890c9c66
...
...
@@ -30,91 +30,99 @@ my $write = 0;
my
$include_non_ref
=
1
;
my
$verbose
=
0
;
&GetOptions
(
'
dbhost:s
'
=>
\
$host
,
'
dbport:n
'
=>
\
$port
,
'
dbname:s
'
=>
\
$dbname
,
'
dbuser:s
'
=>
\
$user
,
'
dbpass:s
'
=>
\
$pass
,
'
coord_system_name:s
'
=>
\
$coord_system
,
'
seq_region_name:s
'
=>
\
$seq_region_name
,
'
write!
'
=>
\
$write
,
'
include_non_ref!
'
=>
\
$include_non_ref
,
'
verbose!
'
=>
\
$verbose
,
);
unless
(
$write
)
{
print
"
you've not used the -write option so results will not be written into the database
\n
"
;
}
my
$db
=
new
Bio::EnsEMBL::DBSQL::
DBAdaptor
(
-
host
=>
$host
,
-
user
=>
$user
,
-
port
=>
$port
,
-
dbname
=>
$dbname
,
-
pass
=>
$pass
,
);
&GetOptions
(
'
dbhost:s
'
=>
\
$host
,
'
dbport:n
'
=>
\
$port
,
'
dbname:s
'
=>
\
$dbname
,
'
dbuser:s
'
=>
\
$user
,
'
dbpass:s
'
=>
\
$pass
,
'
coord_system_name:s
'
=>
\
$coord_system
,
'
seq_region_name:s
'
=>
\
$seq_region_name
,
'
write!
'
=>
\
$write
,
'
include_non_ref!
'
=>
\
$include_non_ref
,
'
verbose!
'
=>
\
$verbose
,
);
unless
(
$write
)
{
print
"
you've not used the -write option so results will not be written into the database
\n
";
}
my
$db
=
new
Bio::EnsEMBL::DBSQL::
DBAdaptor
(
-
host
=>
$host
,
-
user
=>
$user
,
-
port
=>
$port
,
-
dbname
=>
$dbname
,
-
pass
=>
$pass
,
);
my
$sa
=
$db
->
get_SliceAdaptor
;
my
$slices
;
if
(
$seq_region_name
){
my
$slice
=
$sa
->
fetch_by_region
(
$coord_system
,
$seq_region_name
,
$include_non_ref
);
push
(
@$slices
,
$slice
);
}
else
{
$slices
=
$sa
->
fetch_all
(
$coord_system
,
'',
$include_non_ref
);
if
(
$seq_region_name
)
{
my
$slice
=
$sa
->
fetch_by_region
(
$coord_system
,
$seq_region_name
,
$include_non_ref
);
push
(
@$slices
,
$slice
);
}
else
{
$slices
=
$sa
->
fetch_all
(
$coord_system
,
'',
$include_non_ref
);
}
my
$update_to_null
=
"
update gene set canonical_transcript_id = NULL, canonical_annotation = NULL
";
$db
->
dbc
->
do
(
$update_to_null
);
# get $db->dbc->db_handle->do($update_to_null) instead if above not working
my
$gene_update_sql
=
"
update gene set canonical_transcript_id = ? where gene_id = ?
";
my
$sth
=
$db
->
dbc
->
prepare
(
$gene_update_sql
);
SLICE:foreach
my
$slice
(
@$slices
){
print
"
Getting genes for
"
.
$slice
->
name
.
"
\n
"
if
(
$verbose
);
my
$genes
=
$slice
->
get_all_Genes
(
undef
,
undef
,
1
);
SLICE:
foreach
my
$slice
(
@$slices
)
{
print
"
Getting genes for
"
.
$slice
->
name
.
"
\n
"
if
(
$verbose
);
my
$genes
=
$slice
->
get_all_Genes
(
undef
,
undef
,
1
);
my
%canonical
;
GENE:foreach
my
$gene
(
@$genes
){
print
"
Updating gene:
",
$gene
->
dbID
,"
\n
";
GENE:
foreach
my
$gene
(
@$genes
)
{
print
"
Updating gene:
",
$gene
->
dbID
,
"
\n
";
my
$transcripts
=
$gene
->
get_all_Transcripts
;
if
(
@$transcripts
==
1
)
{
$canonical
{
$gene
->
dbID
}
=
$transcripts
->
[
0
]
->
dbID
;
if
(
@$transcripts
==
1
)
{
$canonical
{
$gene
->
dbID
}
=
$transcripts
->
[
0
]
->
dbID
;
next
GENE
;
}
my
$has_translation
=
0
;
my
$count
=
0
;
my
$has_translation
=
0
;
my
$count
=
0
;
my
@with_translation
;
my
@no_translation
;
foreach
my
$transcript
(
@$transcripts
){
if
(
$transcript
->
translation
&&
(
$gene
->
biotype
ne
'
processed_transcript
')
&&
(
$gene
->
biotype
ne
'
pseudogene
')){
unless
(
$transcript
->
translation
->
seq
=~
/\*/
){
push
(
@with_translation
,
$transcript
);
}
}
else
{
push
(
@no_translation
,
$transcript
);
foreach
my
$transcript
(
@$transcripts
)
{
if
(
$transcript
->
translation
&&
(
$gene
->
biotype
ne
'
processed_transcript
'
)
&&
(
$gene
->
biotype
ne
'
pseudogene
'
)
&&
(
$transcript
->
biotype
ne
'
nonsense_mediated_decay
'
)
&&
(
$transcript
->
biotype
ne
'
processed_transcript
'
)
)
{
unless
(
$transcript
->
translation
->
seq
=~
/\*/
)
{
push
(
@with_translation
,
$transcript
);
}
}
else
{
push
(
@no_translation
,
$transcript
);
}
}
my
@sorted
;
if
(
@with_translation
){
if
(
@with_translation
)
{
my
@len_and_trans
;
foreach
my
$trans
(
@with_translation
)
{
my
$h
=
{
trans
=>
$trans
,
len
=>
$trans
->
translate
->
length
};
push
@len_and_trans
,
$h
;
push
@len_and_trans
,
$h
;
}
my
@tmp_sorted
=
sort
{
$b
->
{
len
}
<=>
$a
->
{
len
}
}
@len_and_trans
;
foreach
my
$h
(
@tmp_sorted
)
{
#print "Adding to sorted " . $h->{trans}->dbID . "\n";
push
@sorted
,
$h
->
{
trans
};
push
@sorted
,
$h
->
{
trans
};
}
}
else
{
@sorted
=
sort
{
$b
->
length
<=>
$a
->
length
}
@no_translation
;
}
else
{
@sorted
=
sort
{
$b
->
length
<=>
$a
->
length
}
@no_translation
;
}
$canonical
{
$gene
->
dbID
}
=
$sorted
[
0
]
->
dbID
;
}
foreach
my
$gene_id
(
keys
(
%canonical
)
)
{
$canonical
{
$gene
->
dbID
}
=
$sorted
[
0
]
->
dbID
;
}
## end foreach my $gene (@$genes)
foreach
my
$gene_id
(
keys
(
%canonical
)
)
{
my
$transcript_id
=
$canonical
{
$gene_id
};
$sth
->
execute
(
$transcript_id
,
$gene_id
)
if
(
$write
);
$sth
->
execute
(
$transcript_id
,
$gene_id
)
if
(
$write
);
}
}
}
## end foreach my $slice (@$slices)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment