From a156e3478815c2af101a64039d3e2468c85af82d Mon Sep 17 00:00:00 2001 From: Graham McVicker <mcvicker@sanger.ac.uk> Date: Fri, 20 Jun 2003 11:01:56 +0000 Subject: [PATCH] removed old systemtests - made obsolete by healthchecks --- systemtests/orphans_and_dangles.pl | 98 ----------- systemtests/stops_and_lengths_by_chr.pl | 205 ------------------------ systemtests/stops_and_lengths_by_fpc.pl | 200 ----------------------- 3 files changed, 503 deletions(-) delete mode 100644 systemtests/orphans_and_dangles.pl delete mode 100755 systemtests/stops_and_lengths_by_chr.pl delete mode 100755 systemtests/stops_and_lengths_by_fpc.pl diff --git a/systemtests/orphans_and_dangles.pl b/systemtests/orphans_and_dangles.pl deleted file mode 100644 index 8b880da470..0000000000 --- a/systemtests/orphans_and_dangles.pl +++ /dev/null @@ -1,98 +0,0 @@ -# this script check some of the foreign key relationships in ensembl style -# database. No commandline args, you have to edit $db= line - -use DBI; -use strict; -use IO::File; - -my $db = DBI->connect( "dbi:mysql:host=ecs2d;database=embl_6_29_new", "ensro","" ); - -# check if contigs are the same in embl and core -my $count = $db->selectrow_array - ( q{ - select count(*) - from contig c, homo_sapiens_core_6_29.contig c2 - where c.id = c2.id - and c.offset != c2.offset - and c.length != c2.length - } - ); - -print STDERR "$count differing contigs\n"; - -# check if new contigs in embl not in core -$count = $db->selectrow_array - ( q{ - select count(*) - from contig c - left join homo_sapiens_core_6_29.contig c2 - on c.id = c2.id - where c2.id is null - } - ); -print STDERR "new contigs in embl $count\n"; - - -orphan( $db, "exon", "exon_id", "exon_transcript", "exon_id" ); -orphan( $db, "exon_transcript", "exon_id", "exon", "exon_id" ); -orphan( $db, "exon", "exon_id", "exon_stable_id", "exon_id" ); -orphan( $db, "exon_stable_id", "exon_id", "exon", "exon_id" ); - - -orphan( $db, "transcript", "transcript_id", "exon_transcript", "transcript_id" ); -orphan( $db, "exon_transcript", "transcript_id", "transcript", "transcript_id" ); -orphan( $db, "transcript", "transcript_id", "transcript_stable_id", "transcript_id" ); -orphan( $db, "transcript_stable_id", "transcript_id", "transcript", "transcript_id" ); - -orphan( $db, "translation", "translation_id", "transcript", "translation_id" ); -orphan( $db, "transcript", "translation_id", "translation", "translation_id" ); - -orphan( $db, "translation", "translation_id", "translation_stable_id", "translation_id" ); -orphan( $db, "translation_stable_id", "translation_id", "translation", "translation_id" ); - - -orphan( $db, "gene", "gene_id", "gene_stable_id", "gene_id" ); -orphan( $db, "gene_stable_id", "gene_id", "gene", "gene_id" ); - -orphan( $db, "gene", "gene_id", "transcript", "gene_id" ); -orphan( $db, "transcript", "gene_id", "gene", "gene_id" ); - -orphan( $db, "objectXref", "xrefId", "Xref", "xrefId" ); - -# very slow, now xref index on obejctXref -# orphan( $db, "Xref", "xrefId", "objectXref", "xrefId" ); - - -exit; - -#$count = $db->selectrow_array -# ( q{ -# -# } -# ); -#print STDERR "$count\n"; - - -sub orphan { - my ( $db, $table1, $col1, $table2, $col2 ) = @_; - print STDERR "Checking $table1 against $table2\n"; - - my $count = $db->selectrow_array - ( qq{ - select count(*) - from $table1 - left join $table2 - on $table1.$col1 = $table2.$col2 - where $table2.$col2 is null - } - ); - - if( $count > 0 ) { - print STDERR "$count orphans on $table1, $table2\n"; - } else { - print STDERR "Ok\n"; - } - -} - - diff --git a/systemtests/stops_and_lengths_by_chr.pl b/systemtests/stops_and_lengths_by_chr.pl deleted file mode 100755 index dfd6e81036..0000000000 --- a/systemtests/stops_and_lengths_by_chr.pl +++ /dev/null @@ -1,205 +0,0 @@ -#!/usr/local/bin/perl -w - -=head1 NAME - -Another data integrity test - -=head1 SYNOPSIS - -This script has 2 purposes: -- check that the length of each protein is not shorter than cut-off -given by user (default 2). -- check that protein sequence does not contain stops ('*'). - -How it all happens: -- get the list of "current" chromosomes (at the present we still have -things like chrUL_random) unless a list is given on the commandline -- build virtual contigs for each chromosome -- get genes on each virtual contig -- get transcripts for each gene -- get translation of each transcript -- check the length of the transcript and presence of '*' characters. - -Output is divided to STDERR and STDOUT - -STDERR gets messages like: -Now checking genes on chromosome chr1 - -... - -159 genes with 172 transcripts - -... - -Checked 41 chromosomes with 335 genes with 351 transcripts - -Information about proteins, transcripts, genes shorter or equal to -cut-off or containing stops id printed to STDOUT: - -ENSP00000211067 encoded by transcript ENST00000211067 from gene ENSG00000095908 on chromosome chrNA_random has length: 4 - -... - -ENSP00000215395 encoded by transcript ENST00000215395 from gene ENSG00000099644 on chromosome chrUL_random contains '*' character(s). -TPSQSEDLRACFEQNKFQGIATRDGLALAIGFLEPIVQNWFQNERSRQVRQHCRESRPRPGRHGPQEGR*KRTAVTGSQTALLLRAFEKDRFPGIAAREDLAR*TGLPGSRIQIRFQNRRARHLGEAGRAPAKAGSRYNAAP - - -=head1 OPTIONS - --host db server name (default localhost) - --port port to connect to (default 3306) - --dbname name of the database to use (default ensembl_test) - --driver database driver (defauly mysql) - --user username for database access (default ensro) - --pass password for database access (default undef) - --minlength minimal length of the protein to be considered OK - --h|help print out help (this text) - - -=head1 WARNING - -This script takes quite a long time to run... - -=head1 WARNING - -On big chromosomes (like chr1) one is likely to run out of memory -(that is what happend at least on ecs1c) - -=cut - -use strict; -use Bio::EnsEMBL::DBSQL::DBAdaptor; -use Getopt::Long; - -my $minlength = 2; -my $host = 'localhost'; -my $port = 3306; -my $dbname = 'ensembl100'; -my $pass = undef; -my $user = 'ensro'; -my $driver = 'mysql'; -my $help; - -&GetOptions -( - 'host:s' => \$host, - 'port:n' => \$port, - 'dbname:s' => \$dbname, - 'user:s' => \$user, - 'pass:s' => \$pass, - 'driver:s' => \$driver, - 'minlength:n' => \$minlength, - 'h|help' => \$help, -); - -$help && exec('perldoc', $0); - -# Get db adaptor -my $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new -( - -user => $user, - -dbname => $dbname, - -host => $host, - -driver => $driver, - -port => $port, - -pass => $pass, -); - - -# Get list of all current cromosomes -my @chromosomes; -if(@ARGV) -{ - @chromosomes = @ARGV; -} -else -{ - my $sth = $db->prepare - ( - "select distinct(chr_name) from static_golden_path" - ); - my $rv = $sth->execute(); - my $chr; - $sth->bind_columns(undef, \$chr); - while($sth->fetch) - { - push @chromosomes, $chr; - } - unless(@chromosomes) { - die "Could not get chromosome names from static_golden_path table.\n"; - } -} - - -# Get static golden path adaptor -my $stadp = $db->get_StaticGoldenPathAdaptor(); - - -# Iterate over chromosomes and pull out all genes -my($genecount, $transcriptcount); -foreach my $chromosome (@chromosomes) -{ - my($chr_genecount, $chr_transcriptcount); - print STDERR "Building virtual contig for chromosome $chromosome\n"; - my $vc = $stadp->fetch_VirtualContig_by_chr_name($chromosome); - print STDERR "Checking genes on chromosome $chromosome\n"; - foreach my $gene ($vc->get_all_Genes) - { -# print $gene->id, "\n"; deleteObj($gene); next; - $genecount++; $chr_genecount++; - foreach my $transcript ($gene->each_Transcript) - { - $transcriptcount++; $chr_transcriptcount++; - my $pep = $transcript->translate; - my $length = length($pep->seq); - if($length < $minlength) - { - print $pep->id, - " encoded by transcript ", - $transcript->id, - " from gene ", - $gene->id, - " on chromosome ", - $chromosome, - " has length:\t", - $length, - "\n"; - } - if($pep->seq =~ /\*/) - { - print $pep->id, - " encoded by transcript ", - $transcript->id, - " from gene ", - $gene->id, - " on chromosome ", - $chromosome, - " contains '*' character(s).\n", - $pep->seq, - "\n"; - } - } # foreach my $transcript ($gene->each_Transcript) - deleteObj($gene); - } # foreach my $gene (@genes) - print STDERR "$chr_genecount genes with $chr_transcriptcount transcripts\n"; -} # foreach my $chromosome (@chromosomes) - -print STDERR "Checked ", scalar(@chromosomes), " chromosomes with $genecount genes with $transcriptcount transcripts\n"; - -sub deleteObj { - my $self = shift; - my @dummy = values %{$self}; - foreach my $key ( keys %$self ) { - delete $self->{$key}; - } - foreach my $obj ( @dummy ) { - deleteObj($obj); - } -} diff --git a/systemtests/stops_and_lengths_by_fpc.pl b/systemtests/stops_and_lengths_by_fpc.pl deleted file mode 100755 index 90908c5b1c..0000000000 --- a/systemtests/stops_and_lengths_by_fpc.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/local/bin/perl -w - -=head1 NAME - -Another data integrity test - -=head1 SYNOPSIS - -This script has 2 purposes: -- check that the length of each protein is not shorter than cut-off -given by user (default 2). -- check that protein sequence does not contain stops ('*'). - -How it all happens: -- get the list of fpcs -- build virtual contigs for each fpc -- get genes on each virtual contig -- get transcripts for each gene -- get translation of each transcript -- check the length of the transcript and presence of '*' characters. - -Output is divided to STDERR and STDOUT - -STDERR gets messages like: -Now checking genes on fpc ctg12323 -22 genes with 30 transcripts - - -Information about proteins, transcripts, genes shorter than -cut-off or containing stops id printed to STDOUT: - -ENSP00000228176 encoded by transcript ENST00000228176 from gene ENSG00000110736 on fpc ctg12269 contains '*' character(s). -VKRAYLVHSAYDQSYNFIYKSFRIASII*X - -... - -ENSP00000227580 encoded by transcript ENST00000227580 from gene ENSG00000110163 on fpc ctg12475 has length: 1 - - - -=head1 OPTIONS - --host db server name (default localhost) - --port port to connect to (default 3306) - --dbname name of the database to use (default ensembl_test) - --driver database driver (defauly mysql) - --user username for database access (default ensro) - --pass password for database access (default undef) - --minlength minimal length of the protein to be considered OK - --h|help print out help (this text) - - -=head1 WARNING - -This script takes quite a long time to run... - -=cut - -use strict; -use Bio::EnsEMBL::DBSQL::DBAdaptor; -use Getopt::Long; - -my $minlength = 2; -my $host = 'ecs1b.sanger.ac.uk'; -my $port = 3306; -my $dbname = 'ensembl100'; -my $pass = undef; -my $user = 'ensro'; -my $driver = 'mysql'; -my $help; - -&GetOptions -( - 'host:s' => \$host, - 'port:n' => \$port, - 'dbname:s' => \$dbname, - 'user:s' => \$user, - 'pass:s' => \$pass, - 'driver:s' => \$driver, - 'minlength:n' => \$minlength, - 'h|help' => \$help, -); - -$help && exec('perldoc', $0); - -# Get db adaptor -my $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new -( - -user => $user, - -dbname => $dbname, - -host => $host, - -driver => $driver, - -port => $port, - -pass => $pass, -); - - -# Get list of all current cromosomes -my @fpcs; -if(@ARGV) -{ - @fpcs = @ARGV; -} -else -{ - my $sth = $db->prepare - ( - "select distinct(fpcctg_name) from static_golden_path" - ); - my $rv = $sth->execute(); - my $fpc; - $sth->bind_columns(undef, \$fpc); - while($sth->fetch) - { - push @fpcs, $fpc; - } -} - - -# Get static golden path adaptor -my $stadp = $db->get_StaticGoldenPathAdaptor(); - - -# Iterate over fpcs and pull out all genes -my($genecount, $transcriptcount, $counter) = (0, 0, 0); -foreach my $fpc (@fpcs) -{ - my($fpc_genecount, $fpc_transcriptcount) = (0, 0); - print STDERR "Building virtual contig for fpc $fpc\n"; - my $vc = $stadp->fetch_VirtualContig_by_fpc_name($fpc); - print STDERR "Now checking genes on fpc $fpc\n"; - foreach my $gene ($vc->get_all_Genes) - { - $genecount++; $fpc_genecount++; - foreach my $transcript ($gene->each_Transcript) - { - $transcriptcount++; $fpc_transcriptcount++; - my $pep = $transcript->translate; - my $length = length($pep->seq); - if($length < $minlength) - { - print $pep->id, - " encoded by transcript ", - $transcript->id, - " from gene ", - $gene->id, - " on fpc ", - $fpc, - " has length:\t", - $length, - "\n"; - } - if($pep->seq =~ /\*/) - { - print $pep->id, - " encoded by transcript ", - $transcript->id, - " from gene ", - $gene->id, - " on fpc ", - $fpc, - " contains '*' character(s).\n", - $pep->seq, - "\n"; - } - deleteObj($pep); - deleteObj($transcript); - } # foreach my $transcript ($gene->each_Transcript) - deleteObj($gene); - } # foreach my $gene (@genes) - print STDERR "$fpc_genecount genes with $fpc_transcriptcount transcripts\n"; - deleteObj($fpc); -} # foreach my $fpc (@fpcs) - -print STDERR "Checked ", scalar(@fpcs), " fpcs with $genecount genes with $transcriptcount transcripts\n"; - - -sub deleteObj { - my $self = shift; - eval{ - my @dummy = values %{$self}; - foreach my $key ( keys %$self ) { - delete $self->{$key}; - } - foreach my $obj ( @dummy ) { - deleteObj($obj); - } - }; -} - - - - -- GitLab