fake_stable_id_mapping.pl 21.1 KB
Newer Older
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
1
#!/usr/bin/env perl
2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright [1999-2013] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#      http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

16 17
# Don't change the above line.
# Change the PATH in the myRun.ksh script if you want to use another perl.
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44

=head1 NAME

fake_stable_id_mapping.pl - fix the stable ID archive for a database after some
genes were deleted

=head1 SYNOPSIS

fake_stable_id_mapping.pl [options]

Options:

    --conffile, --conf=FILE             read parameters from FILE
                                        (default: conf/Conversion.ini)

    --dbname, db_name=NAME              use new database NAME
    --host, --dbhost, --db_host=HOST    use new database host HOST
    --port, --dbport, --db_port=PORT    use new database port PORT
    --user, --dbuser, --db_user=USER    use new database username USER
    --pass, --dbpass, --db_pass=PASS    use new database passwort PASS
    --altdbname=NAME                    use old database NAME
    --althost=HOST                      use old database host HOST
    --altport=PORT                      use old database port PORT
    --altuser=USER                      use old database username USER
    --altpass=PASS                      use old database passwort PASS

    --gene_stable_id_file|gsi_file|gsi=FILE
45 46 47
                                        (deprectated) the path of the file
                                        containing a list of gene stable Ids
                                        that were deleted
48
    --transcript_stable_id_file|tsi_file|tsi=FILE    
49 50 51
                                        (deprectated, optional) the path of the
                                        file containing a list of transcript
                                        stable Ids that were deleted
52
    --skip_ncrna|skip_ncRNA|skip_nc=0|1 (optionally) skip ncRNAs
53
    --skip_biotypes=LIST                (optionally) skip LISTed biotypes
54 55 56 57 58 59 60 61 62 63 64 65

    --logfile, --log=FILE               log to FILE (default: *STDOUT)
    --logpath=PATH                      write logfile to PATH (default: .)
    --logappend, --log_append           append to logfile (default: truncate)
    -v, --verbose                       verbose logging (default: false)
    -i, --interactive=0|1               run script interactively (default: true)
    -n, --dry_run, --dry=0|1            don't write results to database
    -h, --help, -?                      print help (this message)

=head1 DESCRIPTION

This script fakes a stable ID archive run for a database where some genes were
66 67 68 69 70 71 72 73 74 75
deleted.

It assumes that the new database already has its *_stable_id tables populated.
A new mapping session is created and all stable IDs other than the deleted ones
are mapped to themselves. For the deleted genes, appropriate entries in
gene_archive and peptide_archive are created. All this is done to the new
database, whereas stable Ids of deleted objects are looked up in the old
database. The scripts also increments the stable ID versions of genes where
transcripts were deleted (but the gene is still there due to other retained
transcripts).
76 77

Please note that when using two different databases as input and one is from
78
the last release, you might have to patch it to the current schema.
79 80 81 82 83 84 85 86


=head1 AUTHOR

Patrick Meidl <pm2@sanger.ac.uk>

=head1 CONTACT

Magali Ruffier's avatar
Magali Ruffier committed
87
Post questions to the EnsEMBL development list http://lists.ensembl.org/mailman/listinfo/dev
88 89 90 91 92 93 94 95 96

=cut

use strict;
use warnings;
no warnings 'uninitialized';

use Getopt::Long;
use Pod::Usage;
97
use FindBin qw($Bin);
98 99 100 101 102
use Bio::EnsEMBL::Utils::ConversionSupport;
use Digest::MD5 qw(md5_hex);

$| = 1;

103
my $support = new Bio::EnsEMBL::Utils::ConversionSupport("$Bin/../../..");
104 105 106 107 108 109 110 111 112 113 114

# parse options
$support->parse_common_options(@_);
$support->parse_extra_options(
  'althost=s',
  'altport=n',
  'altuser=s',
  'altpass=s',
  'altdbname=s',
  'gene_stable_id_file|gsi_file|gsi=s',
  'transcript_stable_id_file|tsi_file|tsi=s',
115 116
  'skip_ncrna|skip_ncRNA|skip_nc=s',
  'skip_biotypes=s@'
117 118 119 120 121 122 123 124 125 126 127
);
$support->allowed_params(
  $support->get_common_params,
  'althost',
  'altport',
  'altuser',
  'altpass',
  'altdbname',
  'gene_stable_id_file',
  'transcript_stable_id_file',
  'skip_ncrna',
128
  'skip_biotypes',
129 130 131 132 133 134 135
);

if ($support->param('help') or $support->error) {
    warn $support->error if $support->error;
    pod2usage(1);
}

136 137
$support->comma_to_list('skip_biotypes');

138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
# ask user to confirm parameters to proceed
$support->confirm_params;

# get log filehandle and print heading and parameters to logfile
$support->init_log;

$support->check_required_params(
  'althost',
  'altport',
  'altuser',
  'altdbname',
);

# connect to database and get adaptors
my $dba_new = $support->get_database('core');
my $dba_old = $support->get_database('ensembl', 'alt');
my $dbh_new = $dba_new->dbc->db_handle;

156
# define some globally used variables
157 158 159 160 161 162
my %genes = ();
my %genes_mod = ();
my %gsi_del = ();
my %gsi_mod = ();
my %tsi_del = ();
my %tlsi_del = ();
163
my $gsi_string;
164
my $gsi_mod_string;
165 166
my $tsi_string;
my $tlsi_string;
167
my %skip_biotypes = ();
168

169 170 171 172 173 174 175 176 177 178 179 180 181 182
#
# find out which genes and transcripts were deleted
#
if ($support->param('gene_stable_id_file') or
    $support->param('transcript_stable_id_file')) {

  # read list of deleted gene and transcript stable IDs from file(s)
  # this is error-prone and therefore DEPRECATED!
  &parse_deleted_files;

} else {
  # infer deleted objects from dbs (more robust)
  &determine_deleted;
}
183

184 185
# create new mapping session
my $mapping_session_id = &create_mapping_session;
186

187 188 189
# create stable_id_event entries for all objects, mapping to themselves
&create_stable_id_events;

190 191 192
# increment gene version for all genes where transcripts were deleted
&increment_gene_versions;

193 194
# set stable_id_event.new_stable_id to NULL for deleted objects
&mark_deleted;
195

196 197 198 199 200 201 202 203
# populate gene_archive and peptide_archive
&populated_archive;

# finish logfile
$support->finish_log;


### END main ###
204

205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221

=head2 parse_deleted_files

  Example     : &parse_deleted_files;
  Description : DEPRECATED
                Read list of deleted gene and transcript stable IDs from file(s).
                Note that this method of determining which objects were deleted
                is now DEPRECATED (because it was error-prone when dealing with 
                both whole gene and individual transcript deletions).
  Return type : none
  Exceptions  : thrown on missing files
  Caller      : main()
  Status      : At Risk
              : under development

=cut

222
sub parse_deleted_files {
223

224 225
  $support->log_warning("DEPRECATED. Don't use stable ID files (this is error-prone), rather let the script determine which objects were deleted from the dbs.\n");

226 227
  my $ta = $dba_old->get_TranscriptAdaptor;
  my $ga = $dba_old->get_GeneAdaptor;
228

229 230 231 232 233 234 235 236 237
  #
  # read list of deleted gene_stable_ids from file
  #
  $support->log_stamped("Reading list of deleted gene_stable_ids from file, and fetching associated transcript and translation stable IDs from the db...\n");
  my $gfh = $support->filehandle('<', $support->param('gene_stable_id_file'));

  while (my $g = <$gfh>) {
    chomp $g;
    my $gene = $ga->fetch_by_stable_id($g);
238

239
    $genes{$g} = $gene;
240
    $gsi_del{$g} = 1;
241 242 243

    # fetch associated transcript and translation stable IDs from the old db
    foreach my $transcript (@{ $gene->get_all_Transcripts }) {
244 245 246
      $tsi_del{$transcript->stable_id} = 1;
      my $tl = $transcript->translation;
      $tlsi_del{$tl->stable_id} = 1 if ($tl);
247
    }
248 249
  }

250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
  #
  # read list of deleted transcript_stable_ids from file
  #
  if ($support->param('transcript_stable_id_file')) {

    $support->log_stamped("Reading list of deleted transcript_stable_ids from file, and fetching associated translation stable IDs from the db...\n");

    my $tfh = $support->filehandle('<', $support->param('transcript_stable_id_file'));

    while (my $t = <$tfh>) {
      chomp $t;
      my $transcript = $ta->fetch_by_stable_id($t);

      # skip non-protein-coding genes
      unless ($transcript->biotype eq 'protein_coding') {
        $support->log_warning("Transcript ".$transcript->stable_id." is non-protein_coding, skipping.\n", 1);
        next;
      }
      
269 270 271
      $tsi_del{$transcript->stable_id} = 1;
      my $tl = $transcript->translation;
      $tlsi_del{$tl->stable_id} = 1 if ($tl);
272 273 274
      
      my $gene = $ga->fetch_by_transcript_id($transcript->dbID);
      $genes{$gene->stable_id} = $gene;
275
      $gsi_mod{$gene->stable_id} = 1;
276 277 278
    }
  }

279 280 281 282
  $gsi_string = "'".join("', '", keys(%gsi_del))."'";
  $gsi_mod_string = "'".join("', '", keys(%gsi_mod))."'";
  $tsi_string = "'".join("', '", keys(%tsi_del))."'";
  $tlsi_string = "'".join("', '", keys(%tlsi_del))."'";
283

284
  $support->log_stamped("Done loading ".scalar(keys(%gsi_del))." gene, ".scalar(keys(%tsi_del))." transcript and ".scalar(keys(%tlsi_del))." translation stable IDs.\n\n");
285 286 287 288

}


289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
=head2 determine_deleted

  Example     : &determine_deleted;
  Description : Infer deleted genes/transcripts from dbs by comparing which
                objects are in the old and new db.
  Return type : none
  Exceptions  : none
  Caller      : main()
  Status      : At Risk
              : under development

=cut

sub determine_deleted {
  
  $support->log_stamped("Determining list of deleted gene, transcript and translation stable IDs by comparing dbs...\n");

306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
  # optionally skip ncRNAs
  #
  # this is the complete list of ncRNA biotype; you might need to update it (the
  # code below will try to help you with this)
  my @nc_biotypes = qw(
    miRNA
    miRNA_pseudogene
    misc_RNA
    misc_RNA_pseudogene
    Mt_rRNA
    Mt_tRNA
    Mt_tRNA_pseudogene
    rRNA
    rRNA_pseudogene
    scRNA
    scRNA_pseudogene
    snoRNA
    snoRNA_pseudogene
    snRNA
    snRNA_pseudogene
    tRNA_pseudogene
  );

  if ($support->param('skip_ncrna')) {

    %skip_biotypes = map { $_ => 1 } @nc_biotypes;

    # make sure we have a complete list of ncRNA biotypes
    my $sql = qq(SELECT DISTINCT biotype from gene);
    my $sth1 = $dbh_new->prepare($sql);
    $sth1->execute;
    my @biotypes_db;
    
    while ((my $biotype) = $sth1->fetchrow_array) {
      push @biotypes_db, $biotype unless ($skip_biotypes{$biotype});
    }

    $sth1->finish;

    if (@biotypes_db) {
      print "These are the non-ncRNA biotypes found in the db:\n";
      map { print "  $_\n" } @biotypes_db;
      print "\nPlease check that the list of ncRNA biotypes is still complete, otherwise adapt the script.\n";
      exit unless $support->user_proceed("Continue?");
    }
  }

  # optionally skip other biotypes
  if ($support->param('skip_biotypes')) {
    %skip_biotypes = map { $_ => 1 } $support->param('skip_biotypes');
  }

358 359 360 361 362 363 364 365 366 367
  # get old and new genes and transcripts from db
  my $ga_old = $dba_old->get_GeneAdaptor;
  my @genes_old = @{ $ga_old->fetch_all };

  my $ga_new = $dba_new->get_GeneAdaptor;
  my %genes_new = map { $_->stable_id => $_ } @{ $ga_new->fetch_all };
  my $ta_new = $dba_new->get_TranscriptAdaptor;
  my %tsi_new = map { $_ => 1 } @{ $ta_new->list_stable_ids };

  while (my $g_old = shift(@genes_old)) {
368 369 370 371
    
    # skip biotypes
    next if ($skip_biotypes{$g_old->biotype});
  
372 373 374 375 376 377 378 379 380 381
    my $gsi = $g_old->stable_id;
  
    # mark gene as deleted
    unless ($genes_new{$gsi}) {
      $gsi_del{$gsi} = 1;
      $genes{$gsi} = $g_old;
    }

    # transcripts
    foreach my $tr (@{ $g_old->get_all_Transcripts }) {
382 383 384 385

      # skip biotypes
      next if ($skip_biotypes{$tr->biotype});
  
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
      my $tsi = $tr->stable_id;

      unless ($tsi_new{$tsi}) {
        # mark transcript and translation as deleted
        $tsi_del{$tsi} = 1;
        my $tl = $tr->translation;
        $tlsi_del{$tl->stable_id} = 1 if ($tl);

        # mark gene as modified
        $gsi_mod{$gsi} = 1;
        $genes_mod{$gsi} = $g_old;
        $genes{$gsi} = $g_old;
      }
    }
  }

  # create stable ID strings for use in mysql IN statements
  $gsi_string = "'".join("', '", keys(%gsi_del))."'";
  $gsi_mod_string = "'".join("', '", keys(%gsi_mod))."'";
  $tsi_string = "'".join("', '", keys(%tsi_del))."'";
  $tlsi_string = "'".join("', '", keys(%tlsi_del))."'";

  # stats
  my $fmt = "%-15s%6d\n";
  $support->log("Deleted objects found:\n", 1);
  $support->log(sprintf($fmt, "genes", scalar(keys(%gsi_del))), 2);
  $support->log(sprintf($fmt, "transcripts", scalar(keys(%tsi_del))), 2);
  $support->log(sprintf($fmt, "translations", scalar(keys(%tlsi_del))), 2);
  
  $support->log("Modified genes found: ".scalar(keys(%gsi_mod))."\n", 1);

  $support->log_stamped("Done.\n\n");
}


=head2 create_mapping_session

  Example     : my $msi = &create_mapping_session;
  Description : Creates a new mapping_session in the db.
  Return type : Int - mapping_session_id of the newly created entry
  Exceptions  : none
  Caller      : main()
  Status      : At Risk
              : under development

=cut

433
sub create_mapping_session {
434

435
  $support->log("Creating new mapping session...\n");
436
  
437 438
  my $old_db_name = $support->param('altdbname');
  my $new_db_name = $support->param('dbname');
439 440 441 442 443 444 445 446 447
  
  my $old_mca = $dba_old->get_MetaContainer;
  my ($old_release) = @{ $old_mca->list_value_by_key('schema_version') };
  my ($old_assembly) = @{ $old_mca->list_value_by_key('assembly.default') };

  my $new_mca = $dba_new->get_MetaContainer;
  my ($new_release) = @{ $new_mca->list_value_by_key('schema_version') };
  my ($new_assembly) = @{ $new_mca->list_value_by_key('assembly.default') };
  
448
  my $sql = qq(
449 450 451
    INSERT INTO mapping_session
    VALUES (NULL, '$old_db_name', '$new_db_name', '$old_release',
            '$new_release','$old_assembly', '$new_assembly', NOW())
452 453 454
  );
  my $c = $dbh_new->do($sql) unless ($support->param('dry_run'));
  my $mapping_session_id = $dbh_new->{'mysql_insertid'};
455 456 457 458 459 460 461 462 463 464
  
  my $fmt = "%-23s%-40s\n";
  $support->log(sprintf($fmt, 'mapping_session_id', $mapping_session_id), 1);
  $support->log(sprintf($fmt, 'old_db_name', $old_db_name), 1);
  $support->log(sprintf($fmt, 'new_db_name', $new_db_name), 1);
  $support->log(sprintf($fmt, 'old_release', $old_release), 1);
  $support->log(sprintf($fmt, 'new_release', $new_release), 1);
  $support->log(sprintf($fmt, 'old_assembly', $old_assembly), 1);
  $support->log(sprintf($fmt, 'new_assembly', $new_assembly), 1);

465
  $support->log("Done.\n\n");
466

467
  return $mapping_session_id;
468 469 470
}


471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
=head2 create_stable_id_events 

  Example     : &create_stable_id_events
  Description : Creates stable_id_event entries for all objects found in the old
                db, mapping them to themselves. Optionally, some biotypes will
                be skipped (this is useful if a separate script is run to deal
                with ncRNAs).
  Return type : none
  Exceptions  : none
  Caller      : main()
  Status      : At Risk
              : under development

=cut

486 487 488 489 490 491 492 493 494 495 496 497
sub create_stable_id_events {

  $support->log_stamped("Creating stable_id_event entries for all objects, mapping to themselves...\n");

  my $ga = $dba_old->get_GeneAdaptor;
  my @genes = @{ $ga->fetch_all };

  my $sql = qq(
    INSERT INTO stable_id_event 
    VALUES (?, ?, ?, ?, ?, ?, ?)
  );
  my $sth = $dbh_new->prepare($sql);
498

499 500 501
  my %stats = map { $_ => 0 } qw(g tr tl g_tot tr_tot);
  my $num_genes = scalar(@genes);
  my $i;
502

503
  while (my $gene = shift(@genes)) {
504

505 506 507
    $stats{g_tot}++;
    
    next if ($skip_biotypes{$gene->biotype});
508
    
509 510
    unless ($support->param('dry_run')) {
      $sth->execute(
511 512
        $gene->stable_id,
        $gene->version,
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
        $gene->stable_id,
        $gene->version,
        $mapping_session_id,
        'gene',
        1
      );
    }

    $stats{g}++;

    # transcripts
    my @transcripts = @{ $gene->get_all_Transcripts };
    while (my $tr = shift(@transcripts)) {
      
      $stats{tr_tot}++;

529 530
      next if ($skip_biotypes{$tr->biotype});
    
531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564
      unless ($support->param('dry_run')) {
        $sth->execute(
          $tr->stable_id,
          $tr->version,
          $tr->stable_id,
          $tr->version,
          $mapping_session_id,
          'transcript',
          1
        );
      }

      $stats{tr}++;

      # translations
      if (my $tl = $tr->translation) {
        
        unless ($support->param('dry_run')) {
          $sth->execute(
            $tl->stable_id,
            $tl->version,
            $tl->stable_id,
            $tl->version,
            $mapping_session_id,
            'translation',
            1
          );
        }

        $stats{tl}++;
      }

    }

565
  }
566 567 568 569 570

  $sth->finish;

  $support->log_stamped("Done inserting entries for $stats{g} (of $stats{g_tot}) genes, $stats{tr} (of $stats{tr_tot}) transcripts, $stats{tl} translations.\n\n");

571 572
}

573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637

=head2 increment_gene_versions 

  Example     : &increment_gene_versions;
  Description : Increment version of all genes where transcripts were deleted.
                Also checks that gene_stable_id.stable_id is correct (and adjusts
                if necessary).
  Return type : none
  Exceptions  : none
  Caller      : main()
  Status      : At Risk
              : under development

=cut

sub increment_gene_versions {
  
  $support->log_stamped("Incrementing gene versions for genes where transcripts were deleted...\n");

  # update stable_id_event
  my $sql = qq(
    UPDATE stable_id_event
    SET new_version = new_version + 1
    WHERE new_stable_id IN ($gsi_mod_string)
    AND mapping_session_id = $mapping_session_id
  );
  my $c = 0;
  $c = $dbh_new->do($sql) unless ($support->param('dry_run'));
  $support->log("stable_id_event [$c]\n", 1);

  # update gene_stable_id
  $sql = qq(
    UPDATE gene_stable_id
    SET version = ?
    WHERE stable_id = ?
    AND version < ?
  );
  my $sth = $dbh_new->prepare($sql);

  $c = 0;

  foreach my $g (values(%genes_mod)) {
    my $version = $g->version + 1;
    $c += $sth->execute($version, $g->stable_id, $version)
      unless ($support->param('dry_run'));
  }
  
  $support->log("gene_stable_id [$c]\n", 1);

  $support->log_stamped("Done.\n\n");
}


=head2 mark_deleted

  Example     : &mark_deleted;
  Description : Sets stable_id_event.new_stable_id to NULL for deleted objects.
  Return type : none
  Exceptions  : none
  Caller      : main()
  Status      : At Risk
              : under development

=cut

638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
sub mark_deleted {

  $support->log_stamped("Setting new_stable_id to NULL for deleted objects...\n");

  $support->log("Genes... ", 1);
  my $sql = qq(
    UPDATE stable_id_event
    SET new_stable_id = NULL, new_version = 0
    WHERE new_stable_id IN ($gsi_string)
    AND mapping_session_id = $mapping_session_id
  );
  my $c = $dbh_new->do($sql) unless ($support->param('dry_run'));
  $support->log("[$c]\n");

  $support->log("Transcripts... ", 1);
  $sql = qq(
    UPDATE stable_id_event
    SET new_stable_id = NULL, new_version = 0
    WHERE new_stable_id IN ($tsi_string)
    AND mapping_session_id = $mapping_session_id
  );
  $c = $dbh_new->do($sql) unless ($support->param('dry_run'));
  $support->log("[$c]\n");

  $support->log("Translations... ", 1);
  $sql = qq(
    UPDATE stable_id_event
    SET new_stable_id = NULL, new_version = 0
    WHERE new_stable_id IN ($tlsi_string)
    AND mapping_session_id = $mapping_session_id
  );
  $c = $dbh_new->do($sql) unless ($support->param('dry_run'));
  $support->log("[$c]\n");

  $support->log_stamped("Done.\n\n");

}


677 678 679 680 681 682 683 684 685 686 687 688 689
=head2 populate_archive

  Example     : &populate_archive;
  Description : Populates gene_archive and peptide_archive for all deleted
                transcripts.
  Return type : none
  Exceptions  : none
  Caller      : main()
  Status      : At Risk
              : under development

=cut

690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711
sub populated_archive {

  $support->log_stamped("Populating gene_archive and peptide_archive...\n");

  my $sth_gene = $dbh_new->prepare(qq(
    INSERT INTO gene_archive
    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
  ));
  my $sth_pep = $dbh_new->prepare(qq(
    INSERT INTO peptide_archive (md5_checksum, peptide_seq)
    VALUES (?, ?)
  ));

  my $c = 0;

  foreach my $gsi (keys(%genes)) {
    my $gene = $genes{$gsi};

    foreach my $trans (@{ $gene->get_all_Transcripts }) {
    
      # skip transcripts that were not deleted (since %genes may contain genes
      # where only some but not all transcripts were deleted)
712
      next unless ($tsi_del{$trans->stable_id});
713 714 715 716 717
    
      my $tl = $trans->translation;

      # add peptide_archive entry
      unless ($support->param('dry_run')) {
718 719 720 721 722 723 724 725

        my $tl_stable_id = "";
        my $tl_version = 0;
        my $pid = 0;

        if ($tl) {
          $tl_stable_id = $tl->stable_id;
          $tl_version = $tl->version;
726 727
          my $pep_seq = $trans->translate->seq;
          $sth_pep->execute(md5_hex($pep_seq), $pep_seq);
728 729
          $pid = $dbh_new->{'mysql_insertid'};
        }
730 731 732 733 734 735 736

        # add gene_archive entry
        $sth_gene->execute(
            $gene->stable_id,
            $gene->version,
            $trans->stable_id,
            $trans->version,
737 738
            $tl_stable_id,
            $tl_version,
739 740 741 742 743 744 745 746 747
            $pid,
            $mapping_session_id
        );
      }

      $c++;
    }
  }

748
  $support->log_stamped("Done adding $c entries.\n\n");
749
}
750