Commit 99eb5009 authored by Arne Stabenau's avatar Arne Stabenau
Browse files

some edits for more speedy gene retrieval

parent c042c8df
......@@ -105,13 +105,17 @@ sub transfer {
$tmpdir = $self->tmp_dir();
}
# first we should check if all standard trnasfers can go
# otherwise testing this is a pain
$self->check_possible_transfer();
for my $tablename ( keys %{$self->{targetdb}{tables}} ) {
my $skip = 0;
print STDERR "Transfer $tablename ";
open( FH, ">$tmpdir/$tablename.txt" ) or die "cant open dumpfile";
if( exists $self->{tragetdb}{tables}{$tablename}{transfer} ) {
if( exists $self->{targetdb}{tables}{$tablename}{transfer} ) {
my $transfunc = $self->{targetdb}{tables}{$tablename}{transfer};
&$transfunc( $self->source_dbh(), $self->target_dbh(), $tablename, \*FH );
} else {
......@@ -158,6 +162,80 @@ sub transfer {
}
# this function checks if a standard transfer is possible.
# custom selects and custom transfer functions are not checked
# it will only return if it can do the transfer otherwise die
sub check_possible_transfer {
my $self = shift;
for my $tablename ( keys %{$self->{targetdb}{tables}} ) {
my $sourcetable;
if( exists $self->{targetdb}{tables}{$tablename}{transfer} ) {
next;
}
if( exists $self->{targetdb}{tables}{$tablename}{link} ) {
$sourcetable = $self->{targetdb}{tables}{$tablename}{link};
if( $sourcetable eq "" ) {
# skip this table
next;
}
} else {
# find the sourcetable
if( exists $self->{targetdb}{tables}{$tablename}{select} ) {
# custom select, no check
next;
} elsif( ! exists $self->{sourcedb}{tables}{$tablename} ) {
die "Couldnt find source for $tablename. Enter empty sourcetable.";
} else {
$sourcetable = $tablename;
}
}
my @newcols = @{$self->{targetdb}{tables}{$tablename}{columns}};
my @oldcols = @{$self->{sourcedb}{tables}{$sourcetable}{columns}};
my %rename;
if( exists $self->{targetdb}{tables}{$tablename}{columnrename} ) {
%rename = %{$self->{targetdb}{tables}{$tablename}{columnrename}};
} else {
%rename = ();
}
# find all source columns and build select statement
for my $colname ( @newcols ) {
my $selname;
if( exists $rename{$colname} ) {
$selname = $rename{$colname};
if( $selname eq "" ) {
$selname = "NULL";
}
} else {
my $colExists = 0;
for my $oldcol ( @oldcols ) {
if( $oldcol eq $colname ) {
$selname = $colname;
$colExists = 1;
last;
}
}
if( ! $colExists ) {
die "Couldnt fill $tablename.$colname\n";
}
}
}
}
}
sub standard_table_transfer {
my ( $self, $sourcetable, $targettable, $tmpfile ) = @_;
......
......@@ -167,7 +167,7 @@ sub map_coordinates_to_rawcontig {
$self->throw("Expecting +/- 1 for chromosome strand, but got '$strand'");
}
$self->register_region($chr_name, $start, $end);
# $self->register_region($chr_name, $start, $end);
return $self->_mapper->map_coordinates($chr_name, $start, $end, $strand, 'assembly');
}
......
......@@ -937,7 +937,6 @@ sub prepare {
}
# should we try to verify the string?
return $self->_db_handle->prepare($string);
}
......
......@@ -124,6 +124,7 @@ sub fetch_by_geneId {
if( !defined $geneId ) {
$self->throw("Must has a geneId ... ");
}
$self->{rchash} = {};
my $query = qq {
SELECT e.exon_id
......@@ -240,15 +241,15 @@ sub _new_Exon_from_hashRef {
$exon->sticky_rank($hashRef->{'sticky_rank'});
$exon->adaptor($self);
if( !exists $self->{rchash}{$hashRef->{'contig_id'}} ) {
$self->{rchash}{$hashRef->{contig_id}} = $self->db->get_RawContigAdaptor->fetch_by_dbID($hashRef->{'contig_id'});
if ( !defined $self->{rchash}{$hashRef->{contig_id}} ) {
if( !exists $self->{rchash}->{$hashRef->{'contig_id'}} ) {
$self->{rchash}->{$hashRef->{'contig_id'}} = $self->db->get_RawContigAdaptor->fetch_by_dbID($hashRef->{'contig_id'});
if ( !defined $self->{rchash}->{$hashRef->{'contig_id'}} ) {
$self->throw("No contig for ".$hashRef->{'contig_id'});
}
}
$exon->attach_seq($self->{rchash}{$hashRef->{'contig_id'}}->primary_seq);
$exon->contig( $self->{rchash}{$hashRef->{'contig_id'}} );
$exon->attach_seq($self->{rchash}->{$hashRef->{'contig_id'}});
$exon->contig( $self->{rchash}->{$hashRef->{'contig_id'}} );
$exon->seqname($hashRef->{'cid'});
$exon->ori_start( $exon->start );
$exon->ori_end( $exon->end );
......
......@@ -117,7 +117,6 @@ sub fetch_all {
( $aref->[1], $self->db() ); # ?
my $contig = Bio::EnsEMBL::RawContig->new( $aref->[0], $self );
$contig->seq( $dbPrimarySeq );
push( @res, $contig );
}
return @res;
......@@ -149,7 +148,7 @@ sub fetch_by_clone {
my $clone_id = $clone->dbID;
my $sth = $self->prepare( "SELECT contig_id, name, clone_id, length,
offset, corder, dna_id, chromosome_id,
offset, corder, dna_id,
international_name
FROM contig
WHERE clone_id = $clone_id" );
......@@ -220,13 +219,6 @@ sub _fill_contig_from_arrayref {
$international_name ) = @$aref;
my $dbPrimarySeq = Bio::EnsEMBL::DBSQL::DBPrimarySeq->new
( $dna_id, $self->db() ); # ?
# my $contig = Bio::EnsEMBL::RawContig->new($contig_id,$self);
(defined $dbPrimarySeq) && $contig->sequence( $dbPrimarySeq );
(defined $length) && $contig->length( $length );
(defined $name) && $contig->name( $name );
(defined $offset) && $contig->offset( $offset );
......
......@@ -72,6 +72,7 @@ sub fetch_by_contig_id_start_end_strand {
}
if( $end == -1 ) {
$sth = $self->prepare( "SELECT c.length, SUBSTRING( d.sequence, $start )
FROM dna d, contig c
WHERE d.dna_id = c.dna_id
......@@ -194,7 +195,7 @@ sub fetch_by_assembly_location {
$strand, $chrName, $assemblyType ) = @_;
my $mapper = $self->db->get_AssemblyMapperAdaptor->fetch_by_type($assemblyType);
$mapper->register_region($chrName,$chrStart,$chrEnd);
# $mapper->register_region($chrName,$chrStart,$chrEnd);
my @coord_list = $mapper->map_coordinates_to_rawcontig
( $chrName, $chrStart, $chrEnd, $strand );
......
......@@ -281,6 +281,8 @@ sub get_all_Genes{
my $gene_adaptor = $self->adaptor->db->get_GeneAdaptor();
my @genes = $gene_adaptor->fetch_by_Slice($self);
return @genes;
}
......
......@@ -667,4 +667,114 @@ CREATE TABLE prediction_transcript (
# Auto add schema version to database
insert into meta (meta_key, meta_value) values ("schema_version", "$Revision$");
# This table stores the full tiling path for whole genome
# shotgun data. This is used when the contigs in
# the contig table are partially denormalized (typically to
# 5Mb) enabling the code and the web site to be as
# fast as possible without losing any of the assembly
# information
CREATE TABLE assembly_contig (
assembly_contig_id INT unsigned not null auto_increment,
assembly_contig_name varchar(20) NOT NULL,
assembly_contig_chr_name varchar(20) NOT NULL,
assembly_contig_start int(10) NOT NULL,
assembly_contig_end int(10) NOT NULL,
assembly_contig_chr_start int(10) NOT NULL,
assembly_contig_chr_end int(10) NOT NULL,
assembly_contig_orientation tinyint(2) NOT NULL,
assembly_contig_type varchar(20) NOT NULL,
PRIMARY KEY (assembly_contig_id),
KEY(assembly_contig_name,assembly_contig_type),
KEY(assembly_contig_chr_name,assembly_contig_chr_start,assembly_contig_type)
);
# MySQL dump 8.12
#
# Host: localhost Database: mus_musculus_core_5_2
#--------------------------------------------------------
# Server version 3.23.32-log
#
# Table structure for table 'mapfrag'
#
CREATE TABLE mapfrag (
mapfrag_id int(10) unsigned NOT NULL auto_increment,
type enum('clone','superctg','assembly_contig','band','chr') NOT NULL default 'clone',
dnafrag_id int(10) unsigned NOT NULL default '0',
seq_start int(10) unsigned NOT NULL default '0',
seq_end int(10) unsigned NOT NULL default '0',
orientation tinyint(4) NOT NULL default '0',
name varchar(40) NOT NULL default '',
PRIMARY KEY (mapfrag_id),
UNIQUE KEY name(name),
KEY m(dnafrag_id,seq_start)
) TYPE=MyISAM;
#
# Table structure for table 'dnafrag'
#
CREATE TABLE dnafrag (
dnafrag_id int(10) unsigned NOT NULL auto_increment,
name varchar(40) NOT NULL default '',
dnafrag_type enum('RawContig','Chromosome') default NULL,
PRIMARY KEY (dnafrag_id),
UNIQUE KEY name(name)
) TYPE=MyISAM;
#
# Table structure for table 'mapannotation'
#
CREATE TABLE mapannotation (
mapannotation_id int(10) unsigned NOT NULL auto_increment,
mapfrag_id int(10) unsigned NOT NULL default '0',
mapannotationtype_id smallint(5) unsigned NOT NULL default '0',
value varchar(240) NOT NULL default '',
PRIMARY KEY (mapannotation_id),
KEY mapfrag_id(mapfrag_id,mapannotationtype_id),
KEY mapannotationtype_id(mapannotationtype_id,mapfrag_id),
KEY value(value,mapannotationtype_id),
KEY mapannotationtype_id_2(mapannotationtype_id,value)
) TYPE=MyISAM;
#
# Table structure for table 'mapannotationtype'
#
CREATE TABLE mapannotationtype (
mapannotationtype_id smallint(5) unsigned NOT NULL auto_increment,
code varchar(15) NOT NULL default '',
name varchar(255) NOT NULL default '',
description text NOT NULL,
PRIMARY KEY (mapannotationtype_id),
UNIQUE KEY c(code)
) TYPE=MyISAM;
#
# Table structure for table 'mapset'
#
CREATE TABLE mapset (
mapset_id smallint(5) unsigned NOT NULL auto_increment,
code varchar(15) NOT NULL default '',
name varchar(255) NOT NULL default '',
description text NOT NULL,
PRIMARY KEY (mapset_id),
UNIQUE KEY c(code)
) TYPE=MyISAM;
#
# Table structure for table 'mapfrag_mapset'
#
CREATE TABLE mapfrag_mapset (
mapfrag_id int(10) unsigned NOT NULL default '0',
mapset_id smallint(5) unsigned NOT NULL default '0',
PRIMARY KEY (mapset_id,mapfrag_id)
) TYPE=MyISAM;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment