Skip to content
Snippets Groups Projects
Commit 5f76c066 authored by Andreas Kusalananda Kähäri's avatar Andreas Kusalananda Kähäri
Browse files

Mostly reformatting.

parent 29305b1d
No related branches found
No related tags found
No related merge requests found
......@@ -255,13 +255,14 @@ sub build_cache_all {
=cut
sub build_cache_from_genes {
my $self = shift;
my $type = shift;
my $genes = shift;
my $self = shift;
my $type = shift;
my $genes = shift;
my $need_project = shift;
throw("You must provide a type.") unless $type;
throw("You must provide a listref of genes.") unless (ref($genes) eq 'ARRAY');
throw("You must provide a listref of genes.")
unless ( ref($genes) eq 'ARRAY' );
# biotype filter
if ( $self->conf->param('biotypes') ) {
......@@ -276,139 +277,130 @@ sub build_cache_from_genes {
#my $num_genes = scalar(@$genes);
#my $progress_id = $self->logger->init_progress($num_genes);
# loop over genes sorted by gene location.
# the sort will hopefully improve assembly mapper cache performance and
# therefore speed up exon sequence retrieval
foreach my $gene (sort { $a->start <=> $b->start } @$genes) {
# loop over genes sorted by gene location.
# the sort will hopefully improve assembly mapper cache performance and
# therefore speed up exon sequence retrieval
foreach my $gene ( sort { $a->start <=> $b->start } @$genes ) {
#$self->logger->log_progressbar($progress_id, ++$i, 2);
#$self->logger->log_progress($num_genes, ++$i, 20, 3, 1);
if ($need_project eq 'CHECK') {
if ( $need_project eq 'CHECK' ) {
# find out whether native coord_system is a common coord_system.
# if so, you don't need to project.
# also don't project if no common coord_system present
if ($self->highest_common_cs) {
my $csid = join(':', $gene->slice->coord_system_name,
$gene->slice->coord_system->version);
if ($self->is_common_cs($csid)) {
if ( $self->highest_common_cs ) {
my $csid = join( ':',
$gene->slice->coord_system_name,
$gene->slice->coord_system->version );
if ( $self->is_common_cs($csid) ) {
$need_project = 0;
}
} else {
}
else {
$need_project = 0;
}
}
# create lightweigt gene
my $lgene = Bio::EnsEMBL::IdMapping::TinyGene->new_fast([
$gene->dbID,
$gene->stable_id,
$gene->version,
$gene->created_date,
$gene->modified_date,
$gene->start,
$gene->end,
$gene->strand,
$gene->slice->seq_region_name,
$gene->biotype,
$gene->status,
$gene->analysis->logic_name,
($gene->is_known ? 1 : 0),
]);
my $lgene =
Bio::EnsEMBL::IdMapping::TinyGene->new_fast( [
$gene->dbID, $gene->stable_id,
$gene->version, $gene->created_date,
$gene->modified_date, $gene->start,
$gene->end, $gene->strand,
$gene->slice->seq_region_name, $gene->biotype,
$gene->status, $gene->analysis->logic_name,
( $gene->is_known ? 1 : 0 ), ] );
# build gene caches
$self->add('genes_by_id', $type, $gene->dbID, $lgene);
$self->add( 'genes_by_id', $type, $gene->dbID, $lgene );
# transcripts
foreach my $tr (@{ $gene->get_all_Transcripts }) {
my $ltr = Bio::EnsEMBL::IdMapping::TinyTranscript->new_fast([
$tr->dbID,
$tr->stable_id,
$tr->version,
$tr->created_date,
$tr->modified_date,
$tr->start,
$tr->end,
$tr->strand,
$tr->length,
md5_hex($tr->spliced_seq),
($tr->is_known ? 1 : 0),
]);
foreach my $tr ( @{ $gene->get_all_Transcripts } ) {
my $ltr =
Bio::EnsEMBL::IdMapping::TinyTranscript->new_fast( [
$tr->dbID, $tr->stable_id,
$tr->version, $tr->created_date,
$tr->modified_date, $tr->start,
$tr->end, $tr->strand,
$tr->length, md5_hex( $tr->spliced_seq ),
( $tr->is_known ? 1 : 0 ), ] );
$lgene->add_Transcript($ltr);
# build transcript caches
$self->add('transcripts_by_id', $type, $tr->dbID, $ltr);
$self->add('genes_by_transcript_id', $type, $tr->dbID, $lgene);
$self->add( 'transcripts_by_id', $type, $tr->dbID, $ltr );
$self->add( 'genes_by_transcript_id', $type, $tr->dbID, $lgene );
# translation (if there is one)
if (my $tl = $tr->translation) {
my $ltl = Bio::EnsEMBL::IdMapping::TinyTranslation->new_fast([
$tl->dbID,
$tl->stable_id,
$tl->version,
$tl->created_date,
$tl->modified_date,
$tr->dbID,
$tr->translate->seq,
($tr->is_known ? 1 : 0),
]);
if ( my $tl = $tr->translation ) {
my $ltl =
Bio::EnsEMBL::IdMapping::TinyTranslation->new_fast( [
$tl->dbID, $tl->stable_id,
$tl->version, $tl->created_date,
$tl->modified_date, $tr->dbID,
$tr->translate->seq, ( $tr->is_known ? 1 : 0 ),
] );
$ltr->add_Translation($ltl);
$self->add('translations_by_id', $type, $tl->dbID, $ltl);
$self->add( 'translations_by_id', $type, $tl->dbID, $ltl );
undef $tl;
}
# exons
foreach my $exon (@{ $tr->get_all_Exons }) {
my $lexon = Bio::EnsEMBL::IdMapping::TinyExon->new_fast([
$exon->dbID,
$exon->stable_id,
$exon->version,
$exon->created_date,
$exon->modified_date,
$exon->start,
$exon->end,
$exon->strand,
$exon->slice->seq_region_name,
$exon->slice->coord_system_name,
$exon->slice->coord_system->version,
$exon->slice->subseq($exon->start, $exon->end, $exon->strand),
$exon->phase,
$need_project,
]);
foreach my $exon ( @{ $tr->get_all_Exons } ) {
my $lexon =
Bio::EnsEMBL::IdMapping::TinyExon->new_fast( [
$exon->dbID,
$exon->stable_id,
$exon->version,
$exon->created_date,
$exon->modified_date,
$exon->start,
$exon->end,
$exon->strand,
$exon->slice->seq_region_name,
$exon->slice->coord_system_name,
$exon->slice->coord_system->version,
$exon->slice->subseq( $exon->start, $exon->end,
$exon->strand ),
$exon->phase,
$need_project, ] );
# get coordinates in common coordinate system if needed
if ($need_project) {
my @seg = @{ $exon->project($self->highest_common_cs,
$self->highest_common_cs_version) };
my @seg = @{
$exon->project( $self->highest_common_cs,
$self->highest_common_cs_version ) };
if (scalar(@seg) == 1) {
if ( scalar(@seg) == 1 ) {
my $sl = $seg[0]->to_Slice;
$lexon->common_start($sl->start);
$lexon->common_end($sl->end);
$lexon->common_strand($sl->strand);
$lexon->common_sr_name($sl->seq_region_name);
$lexon->common_start( $sl->start );
$lexon->common_end( $sl->end );
$lexon->common_strand( $sl->strand );
$lexon->common_sr_name( $sl->seq_region_name );
}
}
$ltr->add_Exon($lexon);
$self->add('exons_by_id', $type, $exon->dbID, $lexon);
$self->add_list('transcripts_by_exon_id', $type, $exon->dbID, $ltr);
$self->add( 'exons_by_id', $type, $exon->dbID, $lexon );
$self->add_list( 'transcripts_by_exon_id',
$type, $exon->dbID, $ltr );
undef $exon;
}
} ## end foreach my $exon ( @{ $tr->get_all_Exons...})
undef $tr;
}
} ## end foreach my $tr ( @{ $gene->get_all_Transcripts...})
undef $gene;
}
} ## end foreach my $gene ( sort { $a...})
return $num_genes;
}
} ## end sub build_cache_from_genes
=head2 filter_biotypes
......@@ -565,40 +557,40 @@ sub find_common_coord_systems {
# get adaptors for source db
my $s_dba = $self->get_DBAdaptor('source');
my $s_csa = $s_dba->get_CoordSystemAdaptor;
my $s_sa = $s_dba->get_SliceAdaptor;
my $s_sa = $s_dba->get_SliceAdaptor;
# get adaptors for target db
my $t_dba = $self->get_DBAdaptor('target');
my $t_csa = $t_dba->get_CoordSystemAdaptor;
my $t_sa = $t_dba->get_SliceAdaptor;
my $t_sa = $t_dba->get_SliceAdaptor;
# find common coord_systems
my @s_coord_systems = @{ $s_csa->fetch_all };
my @t_coord_systems = @{ $t_csa->fetch_all };
my $found_highest = 0;
my $found_highest = 0;
SOURCE:
SOURCE:
foreach my $s_cs (@s_coord_systems) {
if ( !$s_cs->is_default() ) { next SOURCE }
TARGET:
TARGET:
foreach my $t_cs (@t_coord_systems) {
if ( !$t_cs->is_default() ) { next TARGET }
if ($s_cs->name eq $t_cs->name) {
if ( $s_cs->name eq $t_cs->name ) {
# test for identical coord_system version
if ($s_cs->version and ($s_cs->version ne $t_cs->version)) {
if ( $s_cs->version and ( $s_cs->version ne $t_cs->version ) ) {
next TARGET;
}
# test for at least 50% identical seq_regions
if ($self->seq_regions_compatible($s_cs, $s_sa, $t_sa)) {
if ( $self->seq_regions_compatible( $s_cs, $s_sa, $t_sa ) ) {
$self->add_common_cs($s_cs);
unless ($found_highest) {
$self->highest_common_cs($s_cs->name);
$self->highest_common_cs_version($s_cs->version);
$self->highest_common_cs( $s_cs->name );
$self->highest_common_cs_version( $s_cs->version );
}
$found_highest = 1;
......@@ -606,11 +598,11 @@ sub find_common_coord_systems {
next SOURCE;
}
}
}
}
} ## end foreach my $t_cs (@t_coord_systems)
} ## end foreach my $s_cs (@s_coord_systems)
return $found_highest;
}
} ## end sub find_common_coord_systems
sub seq_regions_compatible {
......
......@@ -352,48 +352,55 @@ sub compare_exon_containers {
# Score of at least 0.5 are added to the exon scoring matrix.
#
sub calc_overlap_score {
my $self = shift;
my $self = shift;
my $source_exon = shift;
my $target_exon = shift;
my $matrix = shift;
my $matrix = shift;
my ($start, $end);
my ( $start, $end );
# don't score if exons on different strand
return unless ($source_exon->strand == $target_exon->strand);
return unless ( $source_exon->strand == $target_exon->strand );
# determine overlap start
if ($source_exon->start > $target_exon->start) {
if ( $source_exon->start > $target_exon->start ) {
$start = $source_exon->start;
} else {
}
else {
$start = $target_exon->start;
}
# determine overlap end
if ($source_exon->end < $target_exon->end) {
if ( $source_exon->end < $target_exon->end ) {
$end = $source_exon->end;
} else {
}
else {
$end = $target_exon->end;
}
#
# calculate score, which is defined as average overlap / exon length ratio
# Calculate score, which is defined as average overlap / exon length
# ratio.
#
my $overlap = $end - $start + 1;
my $overlap = $end - $start + 1;
my $source_length = $source_exon->end - $source_exon->start + 1;
my $target_length = $target_exon->end - $target_exon->start + 1;
my $score = ($overlap/$source_length + $overlap/$target_length)/2;
my $score = ( $overlap/$source_length + $overlap/$target_length )/2;
# PENALTY:
# penalise by 10% if phase if different
$score *= 0.9 if ($source_exon->phase != $target_exon->phase);
if ( $source_exon->phase != $target_exon->phase ) {
$score *= 0.9;
}
# add score to scoring matrix if it's at least 0.5
if ($score >= 0.5) {
$matrix->add_score($source_exon->id, $target_exon->id, $score);
if ( $score >= 0.5 ) {
$matrix->add_score( $source_exon->id, $target_exon->id, $score );
}
}
} ## end sub calc_overlap_score
sub run_exonerate {
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment