From c58ddf0487586c64ff01adee161ba090578a4f80 Mon Sep 17 00:00:00 2001 From: ens-carlos <cgg@sanger.ac.uk> Date: Fri, 11 Oct 2013 10:15:52 +0000 Subject: [PATCH] Moved from sf7 personal directory. --- .../assembly_patches/run_patch_mapping.pl | 219 ++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100755 misc-scripts/assembly_patches/run_patch_mapping.pl diff --git a/misc-scripts/assembly_patches/run_patch_mapping.pl b/misc-scripts/assembly_patches/run_patch_mapping.pl new file mode 100755 index 0000000000..4f39e695e1 --- /dev/null +++ b/misc-scripts/assembly_patches/run_patch_mapping.pl @@ -0,0 +1,219 @@ +#!/usr/local/ensembl/bin/perl + +use strict; +use warnings; +use Data::Dumper; +use Getopt::Long; +use Bio::EnsEMBL::Utils::Exception qw(throw warning); +use Bio::EnsEMBL::Slice; +use Bio::EnsEMBL::DBSQL::DBAdaptor; +use Bio::EnsEMBL::DBSQL::SliceAdaptor; +use Bio::EnsEMBL::DBSQL::AssemblyExceptionFeatureAdaptor; +use Bio::EnsEMBL::AssemblyExceptionFeature; + +$| = 1; + +my $host = ''; +my $user = ''; +my $pass = ''; +my $port = ''; +my $dbname = ''; +my @patch_types = ('PATCH_FIX','PATCH_NOVEL'); +my $align_by_component; +my $align_non_ident; +my $fix_overlap; +my $new_align_session; +my $check_repeat_masked; +my $out_dir; +my $perl; +my $pipe_path; +my $script; +my $assembly; + +&GetOptions( 'dbhost:s' => \$host, + 'dbuser:s' => \$user, + 'dbname:s' => \$dbname, + 'dbpass:s' => \$pass, + 'dbport:n' => \$port, + 'align_by_component' => \$align_by_component, + 'align_non_ident' => \$align_non_ident, + 'new_align_session' => \$new_align_session, + 'check_repeat_masked' => \$check_repeat_masked, + 'fix_overlap' => \$fix_overlap, + 'out_dir:s' => \$out_dir, + 'perl:s' => \$perl, + 'pipeline_path:s' => \$pipe_path, + 'assembly:s' => \$assembly); + +my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor( -host => $host, + -user => $user, + -pass => $pass, + -port => $port, + -dbname => $dbname ); + +if((!($align_by_component or $align_non_ident or $fix_overlap or $new_align_session or $check_repeat_masked))){ + throw("Must specify one of -align_by_component, -align_non_ident, -new_align_session, -check_repeat_masked or -fix_overlap.\n"); +} +if((!$out_dir) or (!(-d $out_dir))){ + throw("Must specify a valid output directory.\n"); +} +if(!$perl){ + throw("Must specify perl path.\n"); +} +if(!$pipe_path){ + throw("Must specify path to ensembl-pipeline.\n"); +} +if(!$assembly){ + throw("Must specify assembly i.e. GRCh37.\n"); +} +if($align_by_component){ + $script = $pipe_path."/scripts/Finished/assembly/align_by_component_identity.pl"; + if(! -e $script){ + throw("Can't find script: ".$script."\n"); + } +} +elsif($align_non_ident){ + $script = $pipe_path."/scripts/Finished/assembly/align_nonident.pl"; + if(! -e $script){ + throw("Can't find script: ".$script."\n"); + } +} +elsif($new_align_session){ + $script = $pipe_path."/scripts/Finished/assembly/new_align_session.pl"; + if(! -e $script){ + throw("Can't find script: ".$script."\n"); + } +} +elsif($check_repeat_masked){ + $script = $pipe_path."/scripts/Finished/assembly/check_repeat_masked.pl"; + if(! -e $script){ + throw("Can't find script: ".$script."\n"); + } +} +elsif($fix_overlap){ + $script = $pipe_path."/scripts/Finished/assembly/fix_overlaps.pl"; + if(! -e $script){ + throw("Can't find script: ".$script."\n"); + } +} + +#get the patches +print "Getting patches...\n"; +my $asm_exc_adaptor = $db->get_AssemblyExceptionFeatureAdaptor(); +my @exceptions = @{$asm_exc_adaptor->fetch_all()}; +my @patches; +EXC: foreach my $exc (@exceptions){ + foreach my $type (@patch_types){ + if($exc->type() =~ m/$type/){ + push(@patches, $exc); + next EXC; + } + } +} +#Assuming that AssemblyExceptionFeatureAdaptor's fetch_all will always return two +#entries for each patch and that those two entries are adjacent +my $num_patches = scalar(@patches)/2; +print "Have ".$num_patches." patches.\n"; + +my $command = ""; +my @ref_chroms; +my @patch_chroms; + +#for each patch +for (my $i = 0;$i < $num_patches;$i++){ + + #get the two slices + my $ref_slice; + my $patch_slice; + for(my $j = 0;$j < 2; $j++){ + my $exc = pop(@patches); + #if this is the ref version + if($exc->type =~ m/REF/){ + #alt is only the patch slice + $patch_slice = $exc->alternate_slice(); + } + else{ + #alt is replaced region of ref + $ref_slice = $exc->alternate_slice(); + } + } + if(!($patch_slice and $ref_slice)){ + throw("Something is wrong, the patch and ref slices were not set correctly.\n"); + } + + print "Reached ".$patch_slice->name()." and ".$ref_slice->name()."\n"; + + #running for all patches together below + if($align_by_component or $fix_overlap or $new_align_session or $check_repeat_masked){ + push @ref_chroms, $ref_slice->seq_region_name; + push @patch_chroms, $patch_slice->seq_region_name; + } + elsif($align_non_ident){ + #run for each patch + $command = "bsub -R'select[mem>4000] rusage[mem=4000]' -M4000000 -o ".$out_dir.$patch_slice->seq_region_name."_nonident.out -e ". + $out_dir.$patch_slice->seq_region_name."_nonident.err ". + $perl." ".$script." --dbhost ".$host." --dbport ".$port." --dbuser ".$user." --dbpass ".$pass." --dbname ".$dbname. + " --assembly ".$assembly." --altdbname ".$dbname." --altassembly ".$assembly." --chromosomes ".$ref_slice->seq_region_name. + " --altchromosomes ".$patch_slice->seq_region_name." --logpath ".$out_dir." --logfile ".$patch_slice->seq_region_name."_nonident.log --force-stage"; + + if ( ($patch_slice->seq_region_name eq 'HG1472_PATCH') or ($patch_slice->seq_region_name eq 'HG858_PATCH') ) { + $command = "bsub -R'select[mem>10000] rusage[mem=10000]' -M10000000 -o ".$out_dir.$patch_slice->seq_region_name."_nonident.out -e ". + $out_dir.$patch_slice->seq_region_name."_nonident.err ". + $perl." ".$script." --dbhost ".$host." --dbport ".$port." --dbuser ".$user." --dbpass ".$pass." --dbname ".$dbname. + " --assembly ".$assembly." --altdbname ".$dbname." --altassembly ".$assembly." --chromosomes ".$ref_slice->seq_region_name. + " --altchromosomes ".$patch_slice->seq_region_name." --logpath ".$out_dir." --logfile ".$patch_slice->seq_region_name."_nonident.log --force-stage"; + } + + print $command."\n"; + my $cmd_rtn = `$command`; + print $cmd_rtn."\n"; + } + +} + +if($align_by_component){ + + my $ref_chromosomes = join ",", @ref_chroms; + my $patch_chromosomes = join ",", @patch_chroms; + + $command = "bsub -R\"select[mem>3800] rusage[mem=3800]\" -M3800000 -o ".$out_dir."component_ident.out -e ".$out_dir."component_ident.err ". + $perl." ".$script." --host ".$host." --port ".$port." --user ".$user." --pass ".$pass." --dbname ".$dbname." --altdbname ".$dbname. + " --assembly ".$assembly." --altassembly ".$assembly." --chromosomes ".$ref_chromosomes. + " --altchromosomes ".$patch_chromosomes." --logpath ".$out_dir." --logfile component_ident.log"; + + print $command."\n"; + + my $cmd_rtn = `$command`; + print $cmd_rtn."\n"; +} +elsif($new_align_session){ + my $ref_chromosomes = join ",", @ref_chroms; + my $patch_chromosomes = join ",", @patch_chroms; + + $command = "bsub -o ".$out_dir."new_align_session.out -e ".$out_dir."new_align_session.err ". + $perl." ".$script." --host ".$host." --port ".$port." --user ".$user." --pass ".$pass." --dbname ".$dbname." --altdbname ".$dbname. + " --assembly ".$assembly." --altassembly ".$assembly." --chromosomes ".$ref_chromosomes. + " --altchromosomes ".$patch_chromosomes." --logpath ".$out_dir." --logfile new_align_session.log -author patch -nolog"; + + print $command."\n"; + + my $cmd_rtn = `$command`; + print $cmd_rtn."\n"; + +} +elsif($fix_overlap){ + + my $ref_chromosomes = join ",", @ref_chroms; + my $patch_chromosomes = join ",", @patch_chroms; + + $command = "bsub -o ".$out_dir."fix_overlap.out -e ".$out_dir."fix_overlap.err ". + $perl." ".$script." --dbhost ".$host." --dbport ".$port." --dbuser ".$user." --dbname ".$dbname." --altdbname ".$dbname. + " --dbpass ".$pass." --assembly ".$assembly." --altassembly ".$assembly." --chromosomes ".$ref_chromosomes." --altchromosomes ". + $patch_chromosomes." --logpath ".$out_dir." --logfile fix_overlap.log --force-stage"; + + print $command."\n"; + + my $cmd_rtn = `$command`; + print $cmd_rtn."\n"; +} +print "Completed\n"; -- GitLab