From a156e3478815c2af101a64039d3e2468c85af82d Mon Sep 17 00:00:00 2001
From: Graham McVicker <mcvicker@sanger.ac.uk>
Date: Fri, 20 Jun 2003 11:01:56 +0000
Subject: [PATCH] removed old systemtests - made obsolete by healthchecks

---
 systemtests/orphans_and_dangles.pl      |  98 -----------
 systemtests/stops_and_lengths_by_chr.pl | 205 ------------------------
 systemtests/stops_and_lengths_by_fpc.pl | 200 -----------------------
 3 files changed, 503 deletions(-)
 delete mode 100644 systemtests/orphans_and_dangles.pl
 delete mode 100755 systemtests/stops_and_lengths_by_chr.pl
 delete mode 100755 systemtests/stops_and_lengths_by_fpc.pl

diff --git a/systemtests/orphans_and_dangles.pl b/systemtests/orphans_and_dangles.pl
deleted file mode 100644
index 8b880da470..0000000000
--- a/systemtests/orphans_and_dangles.pl
+++ /dev/null
@@ -1,98 +0,0 @@
-# this script check some of the foreign key relationships in ensembl style 
-# database. No commandline args, you have to edit $db= line
-
-use DBI;
-use strict; 
-use IO::File;
-
-my $db = DBI->connect( "dbi:mysql:host=ecs2d;database=embl_6_29_new", "ensro","" );
-
-# check if contigs are the same in embl and core
-my $count = $db->selectrow_array
-  ( q{
-     select count(*)
-       from contig c, homo_sapiens_core_6_29.contig c2 
-      where c.id = c2.id 
-        and c.offset != c2.offset
-        and c.length != c2.length 
-      } 
-  );
-
-print STDERR "$count differing contigs\n";
-
-# check if new contigs in embl not in core
-$count = $db->selectrow_array
-  ( q{
-       select count(*)
-         from contig c
-        left join homo_sapiens_core_6_29.contig c2 
-          on c.id = c2.id 
-         where c2.id is null
-  } 
-  );
-print STDERR "new contigs in embl $count\n";
-
-
-orphan( $db, "exon", "exon_id", "exon_transcript", "exon_id" );
-orphan( $db, "exon_transcript", "exon_id", "exon", "exon_id" );
-orphan( $db, "exon", "exon_id", "exon_stable_id", "exon_id" );
-orphan( $db, "exon_stable_id", "exon_id", "exon", "exon_id" );
-
-
-orphan( $db, "transcript", "transcript_id", "exon_transcript", "transcript_id" );
-orphan( $db, "exon_transcript", "transcript_id", "transcript", "transcript_id" );
-orphan( $db, "transcript", "transcript_id", "transcript_stable_id", "transcript_id" );
-orphan( $db, "transcript_stable_id", "transcript_id", "transcript", "transcript_id" );
-
-orphan( $db, "translation", "translation_id", "transcript", "translation_id" );
-orphan( $db, "transcript", "translation_id", "translation", "translation_id" );
-
-orphan( $db, "translation", "translation_id", "translation_stable_id", "translation_id" );
-orphan( $db, "translation_stable_id", "translation_id", "translation", "translation_id" );
-
-
-orphan( $db, "gene", "gene_id", "gene_stable_id", "gene_id" );
-orphan( $db, "gene_stable_id", "gene_id", "gene", "gene_id" );
-
-orphan( $db, "gene", "gene_id", "transcript", "gene_id" );
-orphan( $db, "transcript", "gene_id", "gene", "gene_id" );
-
-orphan( $db, "objectXref", "xrefId", "Xref", "xrefId" );
-
-# very slow, now xref index on obejctXref
-# orphan( $db, "Xref", "xrefId", "objectXref", "xrefId" );
-
-
-exit;
-
-#$count = $db->selectrow_array
-#  ( q{
-#  
-#  } 
-#  );
-#print STDERR "$count\n";
-
-
-sub orphan {
-  my ( $db, $table1, $col1, $table2, $col2 ) = @_;
-  print STDERR "Checking $table1 against $table2\n";
-
-  my $count = $db->selectrow_array
-  ( qq{
-       select count(*)
-         from $table1
-        left join $table2
-        on $table1.$col1 = $table2.$col2
-        where $table2.$col2 is null
-      } 
-  );
-
-  if( $count > 0 ) {
-    print STDERR "$count orphans on $table1, $table2\n";
-  } else {
-    print STDERR "Ok\n";
-  }
-
-}
-
-
diff --git a/systemtests/stops_and_lengths_by_chr.pl b/systemtests/stops_and_lengths_by_chr.pl
deleted file mode 100755
index dfd6e81036..0000000000
--- a/systemtests/stops_and_lengths_by_chr.pl
+++ /dev/null
@@ -1,205 +0,0 @@
-#!/usr/local/bin/perl -w
-
-=head1 NAME
-
-Another data integrity test
-
-=head1 SYNOPSIS
-
-This script has 2 purposes:
-- check that the length of each protein is not shorter than cut-off
-given by user (default 2).
-- check that protein sequence does not contain stops ('*').
-
-How it all happens:
-- get the list of "current" chromosomes (at the present we still have
-things like chrUL_random) unless a list is given on the commandline
-- build virtual contigs for each chromosome
-- get genes on each virtual contig
-- get transcripts for each gene
-- get translation of each transcript
-- check the length of the transcript and presence of '*' characters.
-
-Output is divided to STDERR and STDOUT
-
-STDERR gets messages like:
-Now checking genes on chromosome chr1
-
-...
-
-159 genes with 172 transcripts
-
-...
-
-Checked 41 chromosomes with 335 genes with 351 transcripts 
-
-Information about proteins, transcripts, genes shorter or equal to
-cut-off or containing stops id printed to STDOUT:
-
-ENSP00000211067 encoded by transcript ENST00000211067 from gene ENSG00000095908 on chromosome chrNA_random has length:  4
-
-...
-
-ENSP00000215395 encoded by transcript ENST00000215395 from gene ENSG00000099644 on chromosome chrUL_random contains '*' character(s).
-TPSQSEDLRACFEQNKFQGIATRDGLALAIGFLEPIVQNWFQNERSRQVRQHCRESRPRPGRHGPQEGR*KRTAVTGSQTALLLRAFEKDRFPGIAAREDLAR*TGLPGSRIQIRFQNRRARHLGEAGRAPAKAGSRYNAAP  
-
-
-=head1 OPTIONS
-
--host       db server name (default localhost)
-
--port       port to connect to (default 3306)
-
--dbname     name of the database to use (default ensembl_test)
-
--driver     database driver (defauly mysql)
-
--user       username for database access (default ensro)
-
--pass       password for database access (default undef)
-
--minlength  minimal length of the protein to be considered OK
-
--h|help     print out help (this text)
-
-
-=head1 WARNING
-
-This script takes quite a long time to run...
-
-=head1 WARNING
-
-On big chromosomes (like chr1) one is likely to run out of memory
-(that is what happend at least on ecs1c)
-
-=cut
-
-use strict;
-use Bio::EnsEMBL::DBSQL::DBAdaptor;
-use Getopt::Long; 
-
-my $minlength = 2;
-my $host = 'localhost';
-my $port = 3306;
-my $dbname = 'ensembl100';
-my $pass = undef;
-my $user = 'ensro';
-my $driver = 'mysql';
-my $help;
-
-&GetOptions
-(
-             'host:s'      => \$host,
-             'port:n'      => \$port,
-             'dbname:s'    => \$dbname,
-             'user:s'      => \$user,
-             'pass:s'      => \$pass,
-             'driver:s'    => \$driver,
-             'minlength:n' => \$minlength,
-             'h|help'      => \$help,
-);    
-
-$help && exec('perldoc', $0);                                                                                                                         
-
-# Get db adaptor
-my $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new
-(
-    -user   => $user,
-    -dbname => $dbname,
-    -host   => $host,
-    -driver => $driver,
-    -port   => $port,
-    -pass   => $pass,
-);
-
-
-# Get list of all current cromosomes
-my @chromosomes;
-if(@ARGV)
-{
-    @chromosomes = @ARGV;
-}
-else
-{
-    my $sth = $db->prepare
-    (
-        "select distinct(chr_name) from static_golden_path"
-    );
-    my $rv = $sth->execute();
-    my $chr;
-    $sth->bind_columns(undef, \$chr);
-    while($sth->fetch)
-    {
-	push @chromosomes, $chr;
-    }
-    unless(@chromosomes) {
-	die "Could not get chromosome names from static_golden_path table.\n";
-    }
-}
-
-
-# Get static golden path adaptor
-my $stadp = $db->get_StaticGoldenPathAdaptor();
-
-
-# Iterate over chromosomes and pull out all genes
-my($genecount, $transcriptcount);
-foreach my $chromosome (@chromosomes)
-{
-    my($chr_genecount, $chr_transcriptcount);
-    print STDERR "Building virtual contig for chromosome $chromosome\n";
-    my $vc = $stadp->fetch_VirtualContig_by_chr_name($chromosome);
-    print STDERR "Checking genes on chromosome $chromosome\n";
-    foreach my $gene ($vc->get_all_Genes)
-    {
-#	print $gene->id, "\n"; deleteObj($gene); next;
-	$genecount++; $chr_genecount++;
-        foreach my $transcript ($gene->each_Transcript)
-        {
-	    $transcriptcount++; $chr_transcriptcount++;
-            my $pep = $transcript->translate;
-            my $length = length($pep->seq);
-            if($length < $minlength)
-            {
-		print $pep->id,
-                      " encoded by transcript ",
-                      $transcript->id,
-                      " from gene ",
-                      $gene->id,
-                      " on chromosome ",
-                      $chromosome,
-                      " has length:\t",
-                      $length,
-                      "\n";
-            }
-            if($pep->seq =~ /\*/)
-            {
-		print $pep->id,
-                      " encoded by transcript ",
-                      $transcript->id,
-                      " from gene ",
-                      $gene->id,
-                      " on chromosome ",
-                      $chromosome,
-                      " contains '*' character(s).\n",
-                      $pep->seq,
-                      "\n";
-            }
-        } #  foreach my $transcript ($gene->each_Transcript)
-	deleteObj($gene);
-    } # foreach my $gene (@genes)
-    print STDERR "$chr_genecount genes with $chr_transcriptcount transcripts\n";
-} # foreach my $chromosome (@chromosomes)
-
-print STDERR "Checked ", scalar(@chromosomes), " chromosomes with $genecount genes with $transcriptcount transcripts\n";
-
-sub deleteObj {
-  my $self = shift;
-  my @dummy = values %{$self};
-  foreach my $key ( keys %$self ) {
-      delete $self->{$key};
-  }
-  foreach my $obj ( @dummy ) {
-      deleteObj($obj);
-  }
-}
diff --git a/systemtests/stops_and_lengths_by_fpc.pl b/systemtests/stops_and_lengths_by_fpc.pl
deleted file mode 100755
index 90908c5b1c..0000000000
--- a/systemtests/stops_and_lengths_by_fpc.pl
+++ /dev/null
@@ -1,200 +0,0 @@
-#!/usr/local/bin/perl -w
-
-=head1 NAME
-
-Another data integrity test
-
-=head1 SYNOPSIS
-
-This script has 2 purposes:
-- check that the length of each protein is not shorter than cut-off
-given by user (default 2).
-- check that protein sequence does not contain stops ('*').
-
-How it all happens:
-- get the list of fpcs
-- build virtual contigs for each fpc
-- get genes on each virtual contig
-- get transcripts for each gene
-- get translation of each transcript
-- check the length of the transcript and presence of '*' characters.
-
-Output is divided to STDERR and STDOUT
-
-STDERR gets messages like:
-Now checking genes on fpc ctg12323
-22 genes with 30 transcripts 
-
-
-Information about proteins, transcripts, genes shorter than
-cut-off or containing stops id printed to STDOUT:
-
-ENSP00000228176 encoded by transcript ENST00000228176 from gene ENSG00000110736 on fpc ctg12269 contains '*' character(s).
-VKRAYLVHSAYDQSYNFIYKSFRIASII*X  
-
-...
-
-ENSP00000227580 encoded by transcript ENST00000227580 from gene ENSG00000110163 on fpc ctg12475 has length:     1  
-
-
-
-=head1 OPTIONS
-
--host       db server name (default localhost)
-
--port       port to connect to (default 3306)
-
--dbname     name of the database to use (default ensembl_test)
-
--driver     database driver (defauly mysql)
-
--user       username for database access (default ensro)
-
--pass       password for database access (default undef)
-
--minlength  minimal length of the protein to be considered OK
-
--h|help     print out help (this text)
-
-
-=head1 WARNING
-
-This script takes quite a long time to run...
-
-=cut
-
-use strict;
-use Bio::EnsEMBL::DBSQL::DBAdaptor;
-use Getopt::Long; 
-
-my $minlength = 2;
-my $host = 'ecs1b.sanger.ac.uk';
-my $port = 3306;
-my $dbname = 'ensembl100';
-my $pass = undef;
-my $user = 'ensro';
-my $driver = 'mysql';
-my $help;
-
-&GetOptions
-(
-             'host:s'      => \$host,
-             'port:n'      => \$port,
-             'dbname:s'    => \$dbname,
-             'user:s'    => \$user,
-             'pass:s'      => \$pass,
-             'driver:s'    => \$driver,
-             'minlength:n' => \$minlength,
-             'h|help'      => \$help,
-);    
-
-$help && exec('perldoc', $0);                                                                                                                         
-
-# Get db adaptor
-my $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new
-(
-    -user   => $user,
-    -dbname => $dbname,
-    -host   => $host,
-    -driver => $driver,
-    -port   => $port,
-    -pass   => $pass,
-);
-
-
-# Get list of all current cromosomes
-my @fpcs;
-if(@ARGV)
-{
-    @fpcs = @ARGV;
-}
-else
-{
-    my $sth = $db->prepare
-    (
-        "select distinct(fpcctg_name) from static_golden_path"
-    );
-    my $rv = $sth->execute();
-    my $fpc;
-    $sth->bind_columns(undef, \$fpc);
-    while($sth->fetch)
-    {
-	push @fpcs, $fpc;
-    }
-}
-
-
-# Get static golden path adaptor
-my $stadp = $db->get_StaticGoldenPathAdaptor();
-
-
-# Iterate over fpcs and pull out all genes
-my($genecount, $transcriptcount, $counter) = (0, 0, 0);
-foreach my $fpc (@fpcs)
-{
-    my($fpc_genecount, $fpc_transcriptcount) = (0, 0);
-    print STDERR "Building virtual contig for fpc $fpc\n";
-    my $vc = $stadp->fetch_VirtualContig_by_fpc_name($fpc);
-    print STDERR "Now checking genes on fpc $fpc\n";
-    foreach my $gene ($vc->get_all_Genes)
-    {
-	$genecount++; $fpc_genecount++; 
-        foreach my $transcript ($gene->each_Transcript)
-        {
-	    $transcriptcount++; $fpc_transcriptcount++;
-            my $pep = $transcript->translate;
-            my $length = length($pep->seq);
-            if($length < $minlength)
-            {
-		print $pep->id,
-                      " encoded by transcript ",
-                      $transcript->id,
-                      " from gene ",
-                      $gene->id,
-                      " on fpc ",
-                      $fpc,
-                      " has length:\t",
-                      $length,
-                      "\n";
-            }
-            if($pep->seq =~ /\*/)
-            {
-		print $pep->id,
-                      " encoded by transcript ",
-                      $transcript->id,
-                      " from gene ",
-                      $gene->id,
-                      " on fpc ",
-                      $fpc,
-                      " contains '*' character(s).\n",
-                      $pep->seq,
-                      "\n";
-            }
-	    deleteObj($pep);
-	    deleteObj($transcript);
-        } #  foreach my $transcript ($gene->each_Transcript)
-	deleteObj($gene);
-    } # foreach my $gene (@genes)
-    print STDERR "$fpc_genecount genes with $fpc_transcriptcount transcripts\n";
-    deleteObj($fpc);
-} # foreach my $fpc (@fpcs)
-
-print STDERR "Checked ", scalar(@fpcs), " fpcs with $genecount genes with $transcriptcount transcripts\n";
-
-
-sub deleteObj {
-  my $self = shift;
-  eval{
-      my @dummy = values %{$self};
-      foreach my $key ( keys %$self ) {
-	  delete $self->{$key};
-      }
-      foreach my $obj ( @dummy ) {
-	  deleteObj($obj);
-      }
-  };
-}
-
-
-
-
-- 
GitLab