Commit 12a6ff12 authored by Emmanuel Mongin's avatar Emmanuel Mongin
Browse files

don'remember codded in the plane few days ago...

parent 4652a7d4
......@@ -585,7 +585,7 @@ sub get_Introns{
if ($previous_ex_end != 0) {
my $intron_start = $previous_ex_end;
my $intron_end = $ex_start;
my $feat1 = new Bio::EnsEMBL::SeqFeature ( -seqname => $protid,
-start => $starts->[$count],
-end => $starts->[$count],
......@@ -642,8 +642,12 @@ sub get_exon_global_coordinates{
$sth->execute;
my @row = $sth->fetchrow;
my $start = $row[0];
my $end = $row[1];
return ($start,$end);
}
......@@ -659,121 +663,70 @@ sub get_exon_global_coordinates{
=cut
sub get_snps {
my ($self,$protein) = @_;
my $db = $self->db;
my $gene;
my $transcript;
my @ex_snps;
my $count = 0;
my @array_features;
my ($self,$protein) = @_;
my $db = $self->db;
my $gene;
my $transcript;
my @ex_snps;
my $count = 0;
my @array_features;
#Get the transcript, gene accession numbers for the given peptide
my $transid = $protein->transcriptac();
my $geneid = $protein->geneac();
my $protid = $protein->id();
my $transid = $protein->transcriptac();
my $geneid = $protein->geneac();
my $protid = $protein->id();
#For now a static golden path is built (this is not the fastest solution but the easiest one). Should be changed to direct sql statements.
$db->add_ExternalFeatureFactory($self->snp_obj());
$db->static_golden_path_type('UCSC');
my $virtual = $db->get_StaticGoldenPathAdaptor();
$db->add_ExternalFeatureFactory($self->snp_obj());
$db->static_golden_path_type('UCSC');
my $virtual = $db->get_StaticGoldenPathAdaptor();
#Fetch a virtual contig for the given transcript. All of the external are retrieved for this virtual contig as well as all genes.
my $vc = $virtual->fetch_VirtualContig_of_transcript($transid,0);
my @snips = $vc->get_all_ExternalFeatures();
my @genes = $vc->get_all_Genes();
my $vc = $virtual->fetch_VirtualContig_of_transcript($transid,0);
my @snips = $vc->get_all_ExternalFeatures();
my @genes = $vc->get_all_Genes();
#Get which virtual transcript holds the transcript we want to study
foreach my $gen (@genes) {
if ($gen->id eq $geneid) {
$gene = $gen;
}
}
my @transcripts = $gene->each_Transcript();
foreach my $trans (@transcripts) {
if ($trans->id eq $transid) {
$transcript = $trans;
}
}
my $genesnp = new Bio::EnsEMBL::ExternalData::GeneSNP
(-gene => $gene,
-contig => $vc
);
$genesnp->transcript($transcript);
my @seq_diff = $genesnp->snps2transcript(@snips);
foreach my $diff (@seq_diff) {
foreach my $var ($diff->each_Variant) {
if($var->isa('Bio::Variation::AAChange') ) {
print STDERR $var->label, "\n";
print STDERR $var->trivname, "\n";
print STDERR $var->allele_ori->seq, "\n";
print STDERR $var->allele_mut->seq, "\n";
foreach my $all ($var->each_Allele) {
print STDERR $all->seq, "\n";
}
}
}
foreach my $gen (@genes) {
if ($gen->id eq $geneid) {
$gene = $gen;
}
}
#Get all of the exons out of the virtual transcript
#my @exons = $transcript->each_Exon;
#This loop checks which exons are localised on the transcript
# foreach my $sn (@snips) {
# my $sn_pos = $sn->start;
# foreach my $ex(@exons) {
# if (($sn_pos >= $ex->start) && ($sn_pos <= $ex->end)) {
# my $uni;
# $uni->{snp} = $sn;
#This store the position of the exon where the snp is located
# $uni->{pos} = $count;
# push (@ex_snps, $uni);
# }
# $count++;
# }
# $count = 0;
# }
#Loop over all of the snps which are located on exons
# foreach my $rt (@ex_snps) {
#to check
# my @array = (0..$rt->{pos});
# my $previous_exons_length = 0;
#
# foreach my $po (@array) {
# $previous_exons_length =+ $exons[$po]->length;
# }
#Get the location of the snp in aa coordinates
# my $aa_pos = int (($rt->{snp}->start - @exons[$rt->{pos}]->start + $previous_exons_length)/3) + 1;
#Create a Protein_FeaturePair object
# my $feat1 = new Bio::EnsEMBL::SeqFeature ( -seqname => $protid,
# -start => $aa_pos,
# -end => $aa_pos,
# -score => 0,
# -percent_id => "NULL",
# -p_value => "NULL");
#
# my $feat2 = new Bio::EnsEMBL::SeqFeature (-start => 0,
# -end => 0,
# -seqname => "SNP");
#
# my $feature = new Bio::EnsEMBL::Protein_FeaturePair(-feature1 => $feat1,
# -feature2 => $feat2,);
#
# push(@array_features,$feature);
#
# }
return @array_features;
my @transcripts = $gene->each_Transcript();
foreach my $trans (@transcripts) {
if ($trans->id eq $transid) {
$transcript = $trans;
}
}
my $genesnp = new Bio::EnsEMBL::ExternalData::GeneSNP
(-gene => $gene,
-contig => $vc
);
$genesnp->transcript($transcript);
my @seq_diff = $genesnp->snps2transcript(@snips);
foreach my $diff (@seq_diff) {
foreach my $var ($diff->each_Variant) {
if($var->isa('Bio::Variation::AAChange') ) {
#print STDERR $var->label, "\n";
#print STDERR $var->trivname, "\n";
#print STDERR $var->allele_ori->seq, "\n";
#print STDERR $var->allele_mut->seq, "\n";
#foreach my $all ($var->each_Allele) {
# print STDERR $all->seq, "\n";
#}
}
}
}
return @array_features;
}
......
......@@ -55,6 +55,7 @@ use Bio::Annotation::DBLink;
use Bio::EnsEMBL::Transcript;
use Bio::DBLinkContainerI;
use Bio::SeqIO;
use Bio::Tools::SeqStats;
# Object preamble - inheriets from Bio::Root::Object
......@@ -459,6 +460,83 @@ sub end{
}
=head2 molecular_weight
Title : molecular_weight
Usage :
Function: This method has been placed here for convenience function. This gets and return the molecular weight of the protein
Example : my $mw = $protein->molecular_weight
Returns : Moleculat weight of the peptide
Args :
=cut
sub molecular_weight{
my ($self) = @_;
my $mw = ${Bio::Tools::SeqStats->get_mol_wt($self)}[0];
return $mw
}
=head2 checksum
Title : checksum
Usage : my $checksum = $protein->checksum()
Function: Get the crc64 for the sequence of the protein (this method has been copied from _crc64 method which is in swiss.pm in Bioperl and have been placed here for convenience function
Example :
Returns : CRC64
Args : Nothing
=cut
sub checksum{
my ($self) = @_;
my $sequence = \$self->seq();
my $POLY64REVh = 0xd8000000;
my @CRCTableh = 256;
my @CRCTablel = 256;
my $initialized;
my $seq = $$sequence;
my $crcl = 0;
my $crch = 0;
if (!$initialized) {
$initialized = 1;
for (my $i=0; $i<256; $i++) {
my $partl = $i;
my $parth = 0;
for (my $j=0; $j<8; $j++) {
my $rflag = $partl & 1;
$partl >>= 1;
$partl |= (1 << 31) if $parth & 1;
$parth >>= 1;
$parth ^= $POLY64REVh if $rflag;
}
$CRCTableh[$i] = $parth;
$CRCTablel[$i] = $partl;
}
}
foreach (split '', $seq) {
my $shr = ($crch & 0xFF) << 24;
my $temp1h = $crch >> 8;
my $temp1l = ($crcl >> 8) | $shr;
my $tableindex = ($crcl ^ (unpack "C", $_)) & 0xFF;
$crch = $temp1h ^ $CRCTableh[$tableindex];
$crcl = $temp1l ^ $CRCTablel[$tableindex];
}
my $crc64 = sprintf("%08X%08X", $crch, $crcl);
return $crc64;
}
=head2 adaptor
Title : adaptor
......
......@@ -138,3 +138,45 @@ sub idesc{
return $obj->{'idesc'};
}
=head2 intron_length
Title : intron_length
Usage : my $length = $intronfeature->intron_length
Function: Return the length of an intron by calling its starting point and end point in global coordinates
Example :
Returns : Length of a given intron
Args : Nothing
=cut
sub intron_length{
my ($self) = @_;
my $start = $self->feature2->start;
my $end = $self->feature2->end;
my $length = int ((($end - $start)/3)+0.5);
return $length;
}
=head2 intron_position
Title : intron_position
Usage :
Function: Return the position of the intron on the amino acid sequence
Example :
Returns :
Args :
=cut
sub intron_position{
my ($self,@args) = @_;
my $pos = $self->feature1->start;
return $pos;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment