From 4c7155ffadb488599c0efead44a57fd51fd8f50c Mon Sep 17 00:00:00 2001 From: Rhoda Kinsella <rhoda@ebi.ac.uk> Date: Tue, 2 Feb 2010 10:33:03 +0000 Subject: [PATCH] Added code to delete names/GO terms if the -delete_only option is specified. Added code to check whether -registryconf is submitted as a string from the submit_projections.pl script or as a file on the command line. --- .../xref_projection/project_display_xrefs.pl | 71 ++++++++++++++----- 1 file changed, 55 insertions(+), 16 deletions(-) diff --git a/misc-scripts/xref_projection/project_display_xrefs.pl b/misc-scripts/xref_projection/project_display_xrefs.pl index 10d2d9a59d..bc2624ef82 100644 --- a/misc-scripts/xref_projection/project_display_xrefs.pl +++ b/misc-scripts/xref_projection/project_display_xrefs.pl @@ -4,6 +4,7 @@ use strict; # on their orthologs in the "from" database. Can also project GO xrefs. # Orthology relationships are read from a Compara database. +use Data::Dumper; use Getopt::Long; use Bio::EnsEMBL::Registry; use Bio::EnsEMBL::DBSQL::DBAdaptor; @@ -12,14 +13,10 @@ use Bio::EnsEMBL::Utils::Eprof qw(eprof_start eprof_end eprof_dump); my $method_link_type = "ENSEMBL_ORTHOLOGUES"; -my ($conf, $registryconf, $host, $user, $port, $pass, $version, $compara, $from_species, @to_multi, $print, $names, $go_terms, $delete_names, $delete_go_terms, $no_backup, $full_stats, $descriptions, $release, $no_database, $quiet, $max_genes, $one_to_many, $go_check, $all_sources); +my ($conf, $registryconf, $version, $compara, $from_species, @to_multi, $print, $names, $go_terms, $delete_names, $delete_go_terms, $no_backup, $full_stats, $descriptions, $release, $no_database, $quiet, $max_genes, $one_to_many, $go_check, $all_sources, $delete_only); GetOptions('conf=s' => \$conf, 'registryconf=s' => \$registryconf, - 'host=s' => \$host, - 'user=s' => \$user, - 'port=s' => \$port, - 'pass=s' => \$pass, 'version=i' => \$version, 'compara=s' => \$compara, 'from=s' => \$from_species, @@ -40,19 +37,20 @@ GetOptions('conf=s' => \$conf, 'one_to_many' => \$one_to_many, 'go_check' => \$go_check, 'all_sources' => \$all_sources, + 'delete_only' => \$delete_only, 'help' => sub { usage(); exit(0); }); $| = 1; # auto flush stdout $descriptions = 1; -if (!$conf && !$registryconf) { +if (!$conf && !$registryconf && !$delete_only) { print STDERR "Configuration file must be supplied via -conf or -registryconf argument\n"; usage(); exit(1); -} elsif (!$from_species) { +} elsif (!$from_species && !$delete_only) { print STDERR "From species must be supplied via -from argument\n"; usage(); @@ -72,9 +70,9 @@ if (!$conf && !$registryconf) { } -if (!$go_terms && !$names) { +if (!$go_terms && !$names && !$delete_only) { - print STDERR "One or both of --names or --go_terms must be specified\n"; + print STDERR "One or both of --names or --go_terms must be specified unless only -delete_only is being used\n"; print STDERR "Use --help for more detailed usage informaion\n"; exit(1); @@ -106,11 +104,43 @@ my @evidence_codes = ( "IDA", "IEP", "IGI", "IMP", "IPI" ); # load from database and conf file Bio::EnsEMBL::Registry->no_version_check(1); -my $args = eval($registryconf); + +# Registryconf is either the registry configuration passed from the submit_projections.pl +# script or a file name containing the same information that is passed on the command line. + +my $args; + +if (defined($registryconf)) { + if (-f $registryconf) { + open(CONF, $registryconf); + my @contents = <CONF>; + $args = eval(join("\n", @contents)); + close(CONF); + } else { + $args = eval($registryconf); + } +} + + Bio::EnsEMBL::Registry->load_registry_from_multiple_dbs(@{$args}); -Bio::EnsEMBL::Registry->load_all($conf, 0, 1); # options mean "not verbose" and "don't clear registry" +Bio::EnsEMBL::Registry->load_all($conf, 1, 1); # options mean "not verbose" and "don't clear registry" + +# only delete names/GO terms if -delete_only has been specified +if ($delete_only) { + + print "Just deleting, no projection\n"; + foreach my $to_species (@to_multi) { + + my $to_ga = Bio::EnsEMBL::Registry->get_adaptor($to_species, 'core', 'Gene'); + die("Can't get gene adaptor for $to_species - check database connection details; make sure meta table contains the correct species alias\n") if (!$to_ga); + delete_names($to_ga) if ($delete_names); + delete_go_terms($to_ga) if ($delete_go_terms); + } + + exit(0); +} # Get Compara adaptors - use the one specified on the command line, or the first one # defined in the registry file if not specified @@ -290,7 +320,7 @@ sub project_display_names { my @to_transcripts = @{$to_gene->get_all_Transcripts}; my $to_transcript = $to_transcripts[0]; - my $dbname = $dbEntry->dbname(); + $dbname = $dbEntry->dbname(); my $type = $db_to_type{$dbname}; @@ -806,10 +836,16 @@ sub usage { if defined, if not ~/.ensembl_init will be used. Note only the Compara database needs to be defined here, assuming the rest of the databases are on the server - defined by --host etc + defined by --registryconf + + + --registryconf There are two ways in which the registry configuration + information can be passed to the script. This information + is a hash that encodes the registry configuration parameters + and can be passed as a string in a file or as a string on the + commandline. + - --host, --port, Database connection details. - --user, --pass, --version Note that a combination of the host/user and conf files @@ -867,9 +903,12 @@ sub usage { [--help] This text. + Note that projected names or GO terms can be deleted from a database without doing any subsequent + projection by specifying only the -to, -delete_only and -delete_go_terms or -delete_names options. + e.g - perl project_display_xrefs.pl --conf compara_only.ini --host ens-staging -user ensadmin -pass PASS -version 47 -names -delete_names -from human -to dog -nobackup -no_database + perl project_display_xrefs.pl --conf compara_only.ini --host HOST -user USER -pass PASS -version 47 -names -delete_names -from human -to dog -nobackup -no_database EOF -- GitLab