Newer
Older
use strict;
use DBI;
use Bio::EnsEMBL::DBSQL::DBAdaptor;
use XrefMapper::db;
use vars '@ISA';
@ISA = qw{ XrefMapper::db };
=head1 NAME
XrefMapper::BasicMapper
=head1 DESCIPTION
This is the basic mapper routine. It will create the necessary fasta files for
both the xref and ensembl sequences. These will then be matched using exonerate
Ian Longden
committed
and the results written to another file. By creating a <species>.pm file and
inheriting from this base class different matching routines, parameters, data
sets etc can be set.
=head1 CONTACT
Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk
=cut
=head2 dump_seqs
Arg[1]: xref object which holds info needed for the dump of xref
Description: Dumps out the files for the mapping. Xref object should hold
the value of the databases and source to be used.
Returntype : none
Exceptions : will die if species not known or an error occurs while
: trying to write to files.
Caller : general
=cut
my ($self, $xref) = @_;
$self->dump_xref($xref);
$self->dump_ensembl();
=head2 run_matching
Arg[1]: xref object which holds info on method and files.
Description: runs the mapping of the list of files with specied methods
Returntype : none
Exceptions : none
Caller : general
=cut
my ($self,$xref) = @_;
my @list=();
my $i = 0;
foreach my $method (@{$self->method()}){
my @dna=();
push @dna, $method;
push @dna, $xref->dir."/xref_".$i."_dna.fasta";
push @dna, $self->ensembl_dna_file();
push @list, \@dna;
my @pep=();
push @pep, $method;
push @pep, $xref->dir."/xref_".$i."_prot.fasta";
push @pep, $self->ensembl_protein_file();
push @list, \@pep;
$i++;
}
$self->run_mapping(\@list);
=head2 get_species_id_from_species_name
Arg[1]: species name
Description: get the species_id from the database for the named database.
Example : my $id = get_species_id_from_species_name('homo_sapiens');
Returntype : int (species_id)
Exceptions : will die if species does not exist in given xref database.
Caller : general
=cut
sub get_species_id_from_species_name{
my ($xref,$species) = @_;
my $sql = "select species_id from species where name = '".$species."'";
$sth->execute();
my @row = $sth->fetchrow_array();
my $species_id;
if (defined @row) {
$species_id = $row[0];
} else {
print STDERR "Couldn't get ID for species ".$species."\n";
print STDERR "It must be one of :-\n";
$sql = "select name from species";
$sth = $dbi->prepare($sql);
$sth->execute();
while(my @row = $sth->fetchrow_array()){
print STDERR $row[0]."\n";
}
die("Please try again :-)\n");
return $species_id;
}
=head2 get_set_lists
Description: specifies the list of databases and source to be used in the
: generation of one or more data sets.
Returntype : list of lists
Example : my @lists =@{$self->get_set_lists()};
Exceptions : none
Caller : dump_xref
=cut
sub get_set_lists{
my ($self) = @_;
return [["method1",["homo_sapiens","RefSeq"],["homo_sapiens","UniProtSwissProt"]],
["method2",[$self->species,"*"]],
["method3",["*","*"]]];
}
=head2 get_source_id_from_source_name
Arg[1]: source name
Description: get the source_id from the database for the named source.
Example : my $id = get_source_id_from_source_name('RefSeq');
Returntype : int (source_id)
Exceptions : will die if source does not exist in given xref database.
Caller : general
=cut
sub get_source_id_from_source_name{
my ($xref, $source) = @_;
my $source_id;
my $sql = "select source_id from source where name = '".$source."'";
my $dbi = $xref->dbi();
my $sth = $dbi->prepare($sql);
$sth->execute();
my @row = $sth->fetchrow_array();
if (defined $row[0] and $row[0] ne '') {
$source_id = $row[0];
# print $source."\t*".$row[0]."*\n";
} else {
print STDERR "Couldn't get ID for source ".$source."\n";
print STDERR "It must be one of :-\n";
$sql = "select name from source";
$sth = $dbi->prepare($sql);
$sth->execute();
while(my @row = $sth->fetchrow_array()){
print STDERR $row[0]."\n";
}
die("Please try again :-)\n");
}
return $source_id;
}
=head2 dump_xref
Arg[1]: xref object which holds info on method and files.
Description: Dumps the Xref data as fasta file(s)
Returntype : none
Exceptions : none
Caller : dump_seqs
=cut
sub dump_xref{
my ($self,$xref) = @_;
if(!defined($xref->dir())){
if(defined($self->dir)){
$xref->species($self->dir);
}
else{
$xref->dir(".");
}
}
my @method=();
my @lists =@{$self->get_set_lists()};
my $i=0;
foreach my $list (@lists){
print "method->".@$list[0]."\n";
$method[$i] = shift @$list;
my $j = 1;
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
my @species_id=();
foreach my $element (@$list){
while(my $species = shift(@$element)){
# print $j.")\t".$species."\n";
if($species ne "*"){
$species_id[$j] = get_species_id_from_species_name($xref,$species);
}
else{
$species_id[$j] = -1;
}
my $source = shift(@$element);
if($source ne "*"){
$source_id[$j] = get_source_id_from_source_name($xref,$source);
}
else{
$source_id[$j] = -1;
}
print $j."\t".$source. "\t".$source_id[$j] ."\n";
print $j."\t".$species."\t".$species_id[$j]."\n";
$j++;
}
}
#method data fully defined now
dump_subset($xref,\@species_id,\@source_id,$i);
$i++;
}
$self->method(\@method);
return;
}
=head2 dump_subset
Arg[1]: xref object which holds info on files.
Arg[2]: list of species to use.
Arg[3]: list of sources to use.
Arg[4]: index to be used in file creation.
Description: Dumps the Xref data for one set of species/databases
Returntype : none
Exceptions : none
Caller : dump_xref
=cut
sub dump_subset{
my ($xref,$rspecies_id,$rsource_id,$index) = @_;
open(XDNA,">".$xref->dir()."/xref_".$index."_dna.fasta")
|| die "Could not open xref_".$index."_dna.fasta";
my $sql = "select p.xref_id, p.sequence, x.species_id , x.source_id ";
$sql .= " from primary_xref p, xref x ";
$sql .= " where p.xref_id = x.xref_id and ";
$sql .= " p.sequence_type ='dna' ";
for (my $j =1; $j<scalar(@$rspecies_id); $j++){
print $j."\t".$$rspecies_id[$j]."\t".$$rsource_id[$j]."\n";
}
# return $xref->dir."/xref_".$i."_dna.fasta";
my $sth = $xref->dbi()->prepare($sql);
while(my @row = $sth->fetchrow_array()){
my $pass = 0;
for (my $j =1; $j<scalar(@$rspecies_id); $j++){
if($$rspecies_id[$j] < 0 or $row[2] == $$rspecies_id[$j]){
if($$rsource_id[$j] < 0 or $row[3] == $$rsource_id[$j]){
$pass = 1;
}
}
}
if($pass){
$i++;
$row[1] =~ s/(.{60})/$1\n/g;
print XDNA ">".$row[0]."\n".$row[1]."\n";
if($i > 10){
goto ENDDNA;
}
ENDDNA:
open(XPRO,">".$xref->dir."/xref_".$index."_prot.fasta")
|| die "Could not open xref_".$index."_prot.fasta";
my $sql = "select p.xref_id, p.sequence, x.species_id , x.source_id ";
$sql .= " from primary_xref p, xref x ";
$sql .= " where p.xref_id = x.xref_id and ";
$sql .= " p.sequence_type ='peptide' ";
while(my @row = $sth->fetchrow_array()){
my $pass = 0;
for (my $j =1; $j<scalar(@$rspecies_id); $j++){
if($$rspecies_id[$j] < 0 or $row[2] == $$rspecies_id[$j]){
if($$rsource_id[$j] < 0 or $row[3] == $$rsource_id[$j]){
$pass = 1;
}
}
}
if($pass){
$i++;
$row[1] =~ s/(.{60})/$1\n/g;
print XPRO ">".$row[0]."\n".$row[1]."\n";
if($i > 10){
goto ENDPRO;
}
ENDPRO:
close XPRO;
=head2 dump_ensembl
Description: Dumps the ensembl data to a file in fasta format.
Returntype : none
Exceptions : none
Caller : dump_seqs
=cut
sub dump_ensembl{
my ($self) = @_;
$self->fetch_and_dump_seq();
=head2 fetch_and_dump_seq
Description: Dumps the ensembl data to a file in fasta format.
Returntype : none
Exceptions : wil die if the are errors in db connection or file creation.
Caller : dump_ensembl
=cut
sub fetch_and_dump_seq{
my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(-species => $self->species(),
-dbname => $self->dbname(),
-host => $self->host(),
-port => $self->port(),
-password => $self->password(),
-group => 'core');
Ian Longden
committed
# if no directory set then dump in the current directory.
if(!defined($self->dir())){
$self->dir(".");
}
$self->ensembl_dna_file($self->dir."/".$self->species."_dna.fasta");
|| die("Could not open dna file for writing: ".$self->ensembl_dna_file."\n");
#
# store ensembl protein file name and open it
#
$self->ensembl_protein_file($self->dir."/".$self->species."_protein.fasta");
open(PEP,">".$self->ensembl_protein_file())
|| die("Could not open dna file for writing: ".$self->ensembl_protein_file."\n");
my $gene_adap = $db->get_GeneAdaptor();
my @gene_ids = @{$gene_adap->list_dbIDs()};
my $i =0;
foreach my $gene_id (@gene_ids){
$i++;
my $gene = $gene_adap->fetch_by_dbID($gene_id);
# print "gene ".$gene."\n";
foreach my $transcript (@{$gene->get_all_Transcripts()}) {
my $seq = $transcript->spliced_seq();
$seq =~ s/(.{60})/$1\n/g;
print DNA ">" . $transcript->dbID() . "\n" .$seq."\n";
my $trans = $transcript->translation();
my $translation = $transcript->translate();
# print "tranlation ".$translation."\n";
my $pep_seq = $translation->seq();
$pep_seq =~ s/(.{60})/$1\n/g;
print PEP ">".$trans->dbID()."\n".$pep_seq."\n";
}
if($i > 10){
goto FIN;
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
#=head2 xref_protein_file
#
# Arg [1] : (optional) string $arg
# the fasta file name for the protein xref
# Example : $file name = $xref->xref_protein_file();
# Description: Getter / Setter for the protien xref fasta file
# Returntype : string
# Exceptions : none
#
#=cut
#
#
#sub xref_protein_file{
# my ($self, $arg) = @_;
#
# (defined $arg) &&
# ($self->{_xref_prot_file} = $arg );
# return $self->{_xref_prot_file};
#}
#
#=head2 xref_dna_file
#
# Arg [1] : (optional) string $arg
# the fasta file name for the dna xref
# Example : $file name = $xref->xref_dna_file();
# Description: Getter / Setter for the dna xref fasta file
# Returntype : string
# Exceptions : none
#
#=cut
#
#sub xref_dna_file{
# my ($self, $arg) = @_;
#
# (defined $arg) &&
# ($self->{_xref_dna_file} = $arg );
# return $self->{_xref_dna_file};
#}
=head2 ensembl_protein_file
Arg [1] : (optional) string $arg
the fasta file name for the ensembl proteins
Example : $file_name = $self->ensembl_protein_file();
Description: Getter / Setter for the protien ensembl fasta file
Returntype : string
Exceptions : none
=cut
(defined $arg) &&
($self->{_ens_prot_file} = $arg );
return $self->{_ens_prot_file};
}
=head2 ensembl_dna_file
Arg [1] : (optional) string $arg
the fasta file name for the ensembl dna
Example : $file_name = $self->ensembl_dna_file();
Description: Getter / Setter for the protien ensembl fasta file
Returntype : string
Exceptions : none
=cut
(defined $arg) &&
($self->{_ens_dna_file} = $arg );
return $self->{_ens_dna_file};
}
=head2 method
Arg [1] : (optional) list reference $arg
reference to a list of method names
Example : my @methods = @{$self->method()};
Description: Getter / Setter for the methods
Returntype : list
Exceptions : none
=cut
sub method{
my ($self, $arg) = @_;
(defined $arg) &&
($self->{_method} = $arg );
return $self->{_method};
}
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
# --------------------------------------------------------------------------------
# GLENN
# Use exonerate (or other program) to find xref-ensembl obejct mappings
sub run_mapping {
my ($self, $lists) = @_;
# foreach method, submit the appropriate job & keep track of the job name
my @job_names;
foreach my $list (@$lists){
my ($method, $queryfile ,$targetfile) = @$list;
print "###$method $queryfile $targetfile\n";
my $obj_name = "XrefMapper::Methods::$method";
# check that the appropriate object exists
eval "require $obj_name";
if($@) {
warn("Could not find object $obj_name corresponding to mapping method $method, skipping\n$@");
} else {
my $obj = $obj_name->new();
my $job_name = $obj->run($queryfile, $targetfile);
push @job_names, $job_name;
print "Submitted LSF job $job_name to list\n";
sleep 1; # make sure unique names really are unique
}
} # foreach method
# submit depend job to wait for all mapping jobs
submit_depend_job($self->dir, @job_names);
} # run_exonerate
sub submit_depend_job {
my ($root_dir, @job_names) = @_;
# Submit a job that does nothing but wait on the main jobs to
# finish. This job is submitted interactively so the exec does not
# return until everything is finished.
# build up the bsub command; first part
my @depend_bsub = ('bsub', '-K');
# one -wended clause for each main job
foreach my $job (@job_names) {
push @depend_bsub, "-wended($job)";
}
# rest of command
push @depend_bsub, ('-q', 'small', '-o', "$root_dir/depend.out", '-e', "$root_dir/depend.err", '/bin/true');
#print "depend bsub:\n" . join (" ", @depend_bsub) . "\n";
my ($depend_wtr, $depend_rtr, $depend_etr, $depend_pid);
$depend_pid = open3($depend_wtr, $depend_rtr, $depend_etr, @depend_bsub);
my $depend_jobid;
while (<$depend_rtr>) {
if (/Job <([0-9]+)> is/) {
$depend_jobid = $1;
print "LSF job ID for depend job: $depend_jobid \n" ;
}
}
if (!defined($depend_jobid)) {
print STDERR "Error: could not get depend job ID\n";
}
}
=head2 store
Arg[1] : The target file used in the exonerate run. Used to work out the Ensembl object type.
Arg[2] :
Example : none
Description: Parse exonerate output files and build files for loading into target db tables.
Returntype : List of strings
Exceptions : none
Caller : general
=cut
sub store {
my ($target_file_name) = @_;
my $type = get_ensembl_object_type($target_file_name);
# get or create the appropriate analysis ID
my $analysis_id = get_analysis_id($type);
# TODO - get this from config
my $dbi = DBI->connect("dbi:mysql:host=ecs1g;port=3306;database=arne_core_20_34",
"ensadmin",
"ensembl",
{'RaiseError' => 1}) || die "Can't connect to database";
# get current max object_xref_id
my $max_object_xref_id = 0;
my $sth = $dbi->prepare("SELECT MAX(object_xref_id) FROM object_xref");
$sth->execute();
my $max_object_xref_id = ($sth->fetchrow_array())[0];
if (!defined $max_object_xref_id) {
print "Can't get highest existing object_xref_id, using 1\n)";
} else {
print "Maximum existing object_xref_id = $max_object_xref_id\n";
}
#my $ox_sth = $dbi->prepare("INSERT INTO object_xref(ensembl_id, ensembl_object_type, xref_id) VALUES(?,?,?)");
#my $ix_sth = $dbi->prepare("INSERT INTO identity_xref VALUES(?,?,?,?,?,?,?,?,?,?,?)");
# files to write table data to
open (OBJECT_XREF, ">object_xref.txt");
open (IDENTITY_XREF, ">identity_xref.txt");
my $total_lines = 0;
my $total_files = 0;
my $object_xref_id = $max_object_xref_id + 1;
# keep a (unique) list of xref IDs that need to be written out to file as well
my %primary_xref_ids;
foreach my $file (glob("*.map")) {
print "Parsing results from $file \n";
open(FILE, $file);
$total_files++;
while (<FILE>) {
$total_lines++;
chomp();
my ($label, $query_id, $target_id, $query_start, $query_end, $target_start, $target_end, $cigar_line, $score) = split(/:/, $_);
$cigar_line =~ s/ //;
# TODO make sure query & target are the right way around
print OBJECT_XREF "$object_xref_id\t$target_id\t$type\t$query_id\n";
print IDENTITY_XREF "$object_xref_id\t$query_id\t$target_id\t$query_start\t$query_end\t$target_start\t$target_end\t$cigar_line\t$score\t\\N\t$analysis_id\n";
# TODO - evalue?
$object_xref_id++;
$primary_xref_ids{$query_id} = $query_id;
# Store in database
# create entry in object_xref and get its object_xref_id
#$ox_sth->execute($target_id, $type, $query_id) || warn "Error writing to object_xref table";
#my $object_xref_id = $ox_sth->{'mysql_insertid'};
# create entry in identity_xref
#$ix_sth->execute($object_xref_id, $query_id, $target_id, $query_start, $query_end, $target_start, $target_end, $cigar_line, $score, undef, $analysis_id) || warn "Error writing to identity_xref table";
}
close(FILE);
}
close(IDENTITY_XREF);
close(OBJECT_XREF);
print "Read $total_lines lines from $total_files exonerate output files\n";
# write relevant xrefs to file
dump_xrefs(\%primary_xref_ids);
}
sub get_ensembl_object_type {
my $filename = shift;
my $type;
if ($filename =~ /gene/i) {
$type = "Gene";
} elsif ($filename =~ /transcript/i) {
$type = "Transcript";
} elsif ($filename =~ /translation/i) {
$type = "Translation";
} else {
print STDERR "Cannot deduce Ensembl object type from filename $filename";
}
return $type;
}
sub get_analysis_id {
my $ensembl_type = shift;
my %typeToLogicName = ( 'transcript' => 'XrefExonerateDNA',
'translation' => 'XrefExonerateProtein' );
my $logic_name = $typeToLogicName{lc($ensembl_type)};
# TODO - get these details from Config
my $host = "ecs1g";
my $port = 3306;
my $database = "arne_core_20_34";
my $user = "ensadmin";
my $password = "ensembl";
my $dbi = DBI->connect("dbi:mysql:host=$host;port=$port;database=$database",
"$user",
"$password",
{'RaiseError' => 1}) || die "Can't connect to database";
my $sth = $dbi->prepare("SELECT analysis_id FROM analysis WHERE logic_name='" . $logic_name ."'");
$sth->execute();
my $analysis_id;
if (my @row = $sth->fetchrow_array()) {
$analysis_id = $row[0];
print "Found exising analysis ID ($analysis_id) for $logic_name\n";
} else {
print "No analysis with logic_name $logic_name found, creating ...\n";
$sth = $dbi->prepare("INSERT INTO analysis (logic_name, created) VALUES ('" . $logic_name. "', NOW())");
# TODO - other fields in analysis table
$sth->execute();
$analysis_id = $sth->{'mysql_insertid'};
print "Done (analysis ID=" . $analysis_id. ")\n";
}
return $analysis_id;
}
sub dump_xrefs {
my $xref_ids_hashref = shift;
my @xref_ids = keys %$xref_ids_hashref;
open (XREF, ">xref.txt");
# TODO - get this from config
my $xref_dbi = DBI->connect("dbi:mysql:host=ecs1g;port=3306;database=glenn_test_xref",
"ensro",
"",
{'RaiseError' => 1}) || die "Can't connect to database";
my $core_dbi = DBI->connect("dbi:mysql:host=ecs1g;port=3306;database=arne_core_20_34",
"ensro",
"",
{'RaiseError' => 1}) || die "Can't connect to database";
# get current highest internal ID from xref
my $max_xref_id = 0;
my $core_sth = $core_dbi->prepare("SELECT MAX(xref_id) FROM xref");
$core_sth->execute();
my $max_xref_id = ($core_sth->fetchrow_array())[0];
if (!defined $max_xref_id) {
print "Can't get highest existing xref_id, using 0\n)";
} else {
print "Maximum existing xref_id = $max_xref_id\n";
}
my $core_xref_id = $max_xref_id + 1;
# keep a unique list of source IDs to build the external_db table later
my %source_ids;
# execute several queries with a max of 200 entries in each IN clause - more efficient
my $batch_size = 200;
while(@xref_ids) {
my @ids;
if($#xref_ids > $batch_size) {
@ids = splice(@xref_ids, 0, $batch_size);
} else {
@ids = splice(@xref_ids, 0);
}
my $id_str;
if(@ids > 1) {
$id_str = "IN (" . join(',', @ids). ")";
} else {
$id_str = "= " . $ids[0];
}
my $sql = "SELECT * FROM xref WHERE xref_id $id_str";
my $xref_sth = $xref_dbi->prepare($sql);
$xref_sth->execute();
my ($xref_id, $accession, $label, $description, $source_id, $species_id);
$xref_sth->bind_columns(\$xref_id, \$accession, \$label, \$description, \$source_id, \$species_id);
# note the xref_id we write to the file is NOT the one we've just read
# from the internal xref database as the ID may already exist in the core database
while (my @row = $xref_sth->fetchrow_array()) {
print XREF "$core_xref_id\t$accession\t$label\t$description\n";
$source_ids{$source_id} = $source_id;
$core_xref_id++;
if ($source_id == 1001) {
print "xref $xref_id has source_id 1001\n";
}
}
# Now get the dependent xrefs for each of these xrefs and write them as well
$sql = "SELECT x.accession, x.label, x.description, x.source_id FROM dependent_xref dx, xref x WHERE x.xref_id=dx.master_xref_id AND master_xref_id $id_str";
my $dep_sth = $xref_dbi->prepare($sql);
$dep_sth->execute();
$dep_sth->bind_columns(\$accession, \$label, \$description, \$source_id);
while (my @row = $dep_sth->fetchrow_array()) {
print XREF "$core_xref_id\t$accession\t$label\t$description\tDEPENDENT\n";
$source_ids{$source_id} = $source_id;
$core_xref_id++;
}
#print "source_ids: " . join(" ", keys(%source_ids)) . "\n";
} # while @xref_ids
close(XREF);
# now write the exernal_db file - the %source_ids hash will contain the IDs of the
# sources that need to be written as external_dbs
open(EXTERNAL_DB, ">external_db.txt");
# get current highest internal ID from external_db
my $max_edb_id = 0;
my $core_sth = $core_dbi->prepare("SELECT MAX(external_db_id) FROM external_db");
$core_sth->execute();
my $max_edb_id = ($core_sth->fetchrow_array())[0];
if (!defined $max_edb_id) {
print "Can't get highest existing external_db_id, using 0\n)";
} else {
print "Maximum existing external_db_id = $max_edb_id\n";
}
my $edb_id = $max_edb_id + 1;
my @source_id_array = keys %source_ids;
my $source_id_str;
if(@source_id_array > 1) {
$source_id_str = "IN (" . join(',', @source_id_array). ")";
} else {
$source_id_str = "= " . $source_id_array[0];
}
my $source_sql = "SELECT name, release FROM source WHERE source_id $source_id_str";
my $source_sth = $xref_dbi->prepare($source_sql);
$source_sth->execute();
my ($source_name, $release);
$source_sth->bind_columns(\$source_name, \$release);
while (my @row = $source_sth->fetchrow_array()) {
print EXTERNAL_DB "$edb_id\t$source_name\t$release\tXREF\n";
# TODO knownxref etc??
$edb_id++;
}
close(EXTERNAL_DB);
}