Commit 4d4cab63 authored by cvs2git's avatar cvs2git
Browse files

This commit was manufactured by cvs2svn to create tag 'tag_before_07_merge'.

Sprout from master 2000-12-18 16:54:24 UTC Philip Lijnzaad <lijnzaad@sanger.ac.uk> 'added schema diagram in pdf format; see also README'
Cherrypick from master 2000-09-17 15:15:32 UTC Ewan Birney <birney@sanger.ac.uk> 'added makeunique...':
    misc-scripts/golden_path/makeunique.pl
    misc-scripts/golden_path/mergefpcchr.pl
    modules/t/EnsTestDB.pm
    modules/t/mapcontig.dump
    modules/t/mapcontig.t
Cherrypick from master 2001-01-03 16:07:21 UTC Michele Clamp <michele@sanger.ac.uk> 'Large tables for dna':
    scripts/test_genes
    sql/table.sql
parent 9031f170
......@@ -10,6 +10,6 @@ while(<>) {
print ERROR "$_\n";
next;
}
print "$_\n";
$h{$rid} = 1;
}
......@@ -34,14 +34,10 @@ shift(@chromo);
shift(@chromo);
foreach $chromo ( @chromo ) {
#$chromo =~ /.*?(\d+).*\.agp/ || next;
$chromo =~ /chr([^_\.]+)[_|\.].*agp/ || next;
$chromo =~ /.*?(\d+).*\.agp/ || next;
$chrnumber = $1;
print STDERR "Doing $chromo\n";
#if ($chromo ne 'chr22.agp') {
# next;
#}
open(GF,"$chr/$chromo") || die "no $file $!\n";
open(OUT,">$chr/$chromo.fpc");
......@@ -58,17 +54,16 @@ foreach $chromo ( @chromo ) {
open(F,"$contigdir/$chrnumber/$contig/$contig.agp") || die "did not open $contig $!\n";
while(<F>) {
# chr20 1 1970 1 P AL360078.3 76699 78668 -
/\S+\/(\S+)\s+(\d+)\s+(\d+)\s+\d+\s+\S\s+(\S+)\s+(\d+)\s+(\d+)/ ||next;
/\S+\/(\S+)\s+(\d+)\s+(\d+)\s+\d+\s+\S\s+(\S+)\s+(\d+)\s+(\d+)/ || next;
$id = $1;
$fstart = $2;
$fend = $3;
$accf = $4;
$accst = $5;
$accend = $6;
$idstring = "$accf:$accst:$accend";
#print "Storing contig string $idstring\n";
$idstring = "$accf:$accst:$accend";
#print "Storing $idstring\n";
$fpc{$idstring} = $id;
$fpcstart{$idstring} = $fstart;
$fpcend{$idstring} = $fend;
......@@ -76,8 +71,6 @@ foreach $chromo ( @chromo ) {
}
while( <GF> ) {
#19/ctg113 1 37401 1 F AC011523.3 1 37401 +
#22/chr22 1 37693 1 F AP000522.1 1 37693 +
/(\S+)\s+(\d+)\s+(\d+)\s+\d+\s+\S\s+(\S+)\s+(\d+)\s+(\d+)/ || do { print OUT $_; next; };
$id = $1;
......@@ -87,7 +80,7 @@ foreach $chromo ( @chromo ) {
$accst = $5;
$accend = $6;
$idstring = "$accf:$accst:$accend";
#print "Reading chromosome line with $idstring\n";
if( !defined $fpc{$idstring} ) {
print STDERR "Could not find fpc contig for $idstring\n";
next;
......
......@@ -203,9 +203,9 @@ sub ensembl_locator {
my $module = ($self->module() || 'Bio::EnsEMBL::DBSQL::Obj');
my $locator = '';
foreach my $meth (qw{ host port dbname user password }) {
foreach my $meth (qw{ host port dbname user }) {
my $value = $self->$meth();
next unless defined $value;
if( !defined $value ) { next; }
$locator .= ';' if $locator;
$locator .= "$meth=$value";
}
......@@ -237,9 +237,7 @@ sub do_sql_file {
}
close SQL;
#Modified split statement, only semicolumns before end of line,
#so we can have them inside a string in the statement
foreach my $s (grep /\S/, split /;\n/, $sql) {
foreach my $s (grep /\S/, split /;/, $sql) {
$self->validate_sql($s);
$dbh->do($s);
$i++
......
......@@ -4,9 +4,6 @@ VALUES (1,'pog', 'pog', 1, 1, 3, '0000-00-00 00:00:00', '0000-00-00 00:00:00', '
INSERT INTO contig (internal_id, id, clone, length, dna, offset, corder, chromosomeId, international_id)
VALUES (1, 'contig1', 1, 50, 1, 0, NULL, 'chromosome1', 'international-a');
-- INSERT INTO dna (id, sequence, created)
-- VALUES (1, 'AAAAAAAAAATTTTTTTTTAAAAAAAAATTTTTTTTTAAAAAAAAAATTTTTTTTTT', '2000-04-27 08:31:59');
--
INSERT INTO dna (id, sequence, created)
VALUES (1, 'AAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTTT', '2000-04-27 08:31:59');
--
VALUES (1, 'AAAAAAAAAATTTTTTTTTAAAAAAAAATTTTTTTTTAAAAAAAAAATTTTTTTTTT', '2000-04-27 08:31:59');
......@@ -21,7 +21,7 @@
## We start with some black magic to print on failure.
BEGIN { $| = 1; print "1..9\n";
BEGIN { $| = 1; print "1..7\n";
use vars qw($loaded); }
END {print "not ok 1\n" unless $loaded;}
......@@ -91,65 +91,12 @@ if( $mc->start != 400 ||
print "ok 6\n";
}
# raw contig seq is:
# AAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTTT
$str = $mc->seq;
$shouldbe='AAAAAAAACCCCCCCCCCGGG';
# if( $str ne 'AAAAAAAATTTTTTTTTAAAA' ) {
if( $str ne $shouldbe ) {
$str = $mc->_actual_sequence_as_string;
if( $str ne 'AAAAAAAATTTTTTTTTAAAA' ) {
print "not ok 7\n";
print STDERR "Seq $str, should be $shouldbe\n";
print STDERR "Seq $str\n";
} else {
print "ok 7\n";
}
# reverse strand:
$mc = Bio::EnsEMBL::Virtual::MapContig->new(
-rawcontig => $contig,
-start => 1001,
-end => 1010,
-rawcontig_start => 8,
-orientation => -1,
);
# raw contig seq is:
# AAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTTT
$str=$mc->seq;
$shouldbe='GGGGGGGTTT';
if ($str eq $shouldbe ) {
print "ok 8\n";
} else {
print "not ok 8\n";
warn "Seq $str, should be $shouldbe\n";
}
# check missing args:
eval {
$mc = Bio::EnsEMBL::Virtual::MapContig->new(
-rawcontig => $contig,
-start => 230,
-rawcontig_start => 3,
-orientation => 1
);
};
if ($@) {
print "ok 8\n";
} else {
print "not ok 8\n";
warn "expected exception on missing arguments";
}
eval {
$mc = Bio::EnsEMBL::Virtual::MapContig->new(
-rawcontig => $contig,
-start => 30,
-end => 10,
-rawcontig_start => 3,
-orientation => 1
);
};
if ( $@ ) {
print "ok 9\n";
} else {
print "not ok 9\n";
warn "expected exception on start > end ";
}
......@@ -25,33 +25,56 @@ $| = 1;
'infile=s' => \$infile,
);
my ($db1) = get_db_handles();
my $db = new Bio::EnsEMBL::DBSQL::Obj(-host => $host1,
-user => 'ensro',
-dbname => $dbname1);
my @ids = $db->get_all_Clone_id;
my ($clone1,$acc) = get_cloneids($infile,@ARGV);
my $i = 0;
for ($i = 0; $i <= $#$clone1; $i++) {
print("Processing clone " . $clone1->[$i] . "\t" . $acc->[$i] . "\n");
open OUT,">chr22.cdna";
foreach my $cloneid (@ids) {
print("Processing clone " . $cloneid . "\n");
eval {
my $cl1 = $db1->get_Clone($clone1->[$i]);
my $cl1 = $db->get_Clone($cloneid);
my @genes1 = $cl1->get_all_Genes();
my $count = 1;
foreach my $gene1 (@genes1) {
print "Found gene " . $gene1->id . "\n";
my @cdna;
foreach my $tran ($gene1->each_Transcript) {
my $pep;
foreach my $exon ($tran->each_Exon) {
$pep .= $exon->seq->seq;
}
push(@cdna,$pep);
}
print("$acc->[$i]\t$clone1->[$i]\t" . $gene1->id . "\n");
foreach my $exon ($gene1->each_unique_Exon) {
print("Exon " . $exon->id . "\t" .$exon->start . "\t" . $exon->end . "\t" . $exon->strand . "\n");
my $maxlen;
my $finalpep;
if ($#cdna >= 0) {
foreach my $p (@cdna) {
if (length($p) > $maxlen) {
$finalpep = $p;
$maxlen = length($p);
}
print OUT ">" . $gene1->id . "\n";
$p =~ s/(.{72})/$1\n/g;
print OUT $p . "\n";
}
}
}
};
if ($@) {
print("Error processing clone: $@\n");
}
}
close(OUT);
sub get_cloneids {
my ($infile,@ARGV) = @_;
......@@ -77,26 +100,3 @@ sub get_cloneids {
sub get_db_handles {
my ($db1);
if( $dbtype1 =~ 'ace' ) {
$db1 = Bio::EnsEMBL::AceDB::Obj->new( -host => $host1,
-port => $port1);
} elsif ( $dbtype1 =~ 'rdb' ) {
my $locator = "Bio::EnsEMBL::DBSQL::Obj/host=$host1;" .
"port=$port1;" .
"dbname=ensembl;" .
"user=$dbuser1;" .
"pass=$dbpass1";
$db1 = Bio::EnsEMBL::DBLoader->new($locator);
} else {
die("$dbtype1 is not a good type (should be ace or rdb)");
}
return ($db1);
}
......@@ -363,7 +363,9 @@ CREATE TABLE genetype (
gene_id varchar(40) NOT NULL,
type varchar(40) NOT NULL,
PRIMARY KEY(gene_id,type)
PRIMARY KEY(gene_id,type),
KEY(gene_id),
KEY(type)
);
# this is a denormalised golden path
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment