BasicMapper.pm

  } # foreach gene

  close(GENE_DESCRIPTIONS);

}

# remove a list of patterns from a string
sub filter_by_regexp {

  my ($str, $regexps) = @_;

  foreach my $regexp (@$regexps) {
    $str =~ s/$regexp//ig;
  }

  return $str;

}

# Regexp used for filter out useless text from gene descriptions
# Method can be overridden in species-specific modules
sub gene_description_filter_regexps {

  return ();

}


# The "consortium" source for this species, should be the same as in
# source table

sub consortium {

  return "xxx"; # Default to something that won't be matched as a source

}

# Sort a list of xrefs by the priority of their sources
# Assumed this function is called by Perl sort, passed with parameter
# See comment for build_gene_descriptions for how precedence is decided.

sub compare_xref_descriptions {

  my ($consortium, $gene_id, $xref_to_object) = @_;

  my @sources = ("Uniprot/SPTREMBL", "RefSeq_dna", "RefSeq_peptide", "Uniprot/SWISSPROT", $consortium);
  my @words = qw(unknown hypothetical putative novel probable [0-9]{3} kDa fragment cdna protein);

  my $src_a = $xref_to_source{$a};
  my $src_b = $xref_to_source{$b};
  my $pos_a = find_in_list($src_a, @sources);
  my $pos_b = find_in_list($src_b, @sources);

  # If same source, need to do more work
  if ($pos_a == $pos_b) {

   if ($src_a eq "Uniprot/SWISSPROT" || $src_a =~ /RefSeq/) {

     # Compare on query identities, then target identities if queries are the same
     my $key_a = $xref_to_object->{$a}; # e.g. "Translation|1234"
     my $key_b = $xref_to_object->{$b};
     my ($type_a, $object_a) = split(/\|/, $key_a);
     my ($type_b, $object_b) = split(/\|/, $key_b);

     return 0 if ($type_a != $type_b); # only compare like with like

     my $query_identity_a = $object_xref_identities{$key_a}->{$a}->{"query_identity"};
     my $query_identity_b = $object_xref_identities{$key_b}->{$b}->{"query_identity"};

     #print "gene 78163 " . $xref_accessions{$a} . " key a $key_a qia $query_identity_a " . $xref_accessions{$b} . " key b $key_b qib $query_identity_b \n" if ($gene_id==78163);
     return ($query_identity_a <=> $query_identity_b) if ($query_identity_a != $query_identity_b);

     my $target_identity_a = $object_xref_identities{$key_a}->{$a}->{"target_identity"};
     my $target_identity_b = $object_xref_identities{$key_b}->{$b}->{"target_identity"};

     return ($target_identity_a <=> $target_identity_b);

   } elsif ($src_a eq "Uniprot/SPTREMBL") {

     # Compare on words
     my $wrd_idx_a = find_match($xref_descriptions{$a}, @words);
     my $wrd_idx_b = find_match($xref_descriptions{$b}, @words);
     return $wrd_idx_a <=> $wrd_idx_b;

   } else {

     return 0;

   }
    return 0;

  } else {

    return $pos_a <=> $pos_b;

  }
}

# load external_db (if it's empty) from ../external_db/external_dbs.txt

sub upload_external_db {
  my ($self) = @_;

  my $core_db = $self->core->dbc;
  $core_db->connect();
  my $row = @{$core_db->db_handle->selectall_arrayref("SELECT COUNT(*) FROM external_db")}[0];
  my $count = @{$row}[0];

  if ($count == 0) {
    my $edb = cwd() . "/../external_db/external_dbs.txt";
    print "external_db table is empty, uploading from $edb\n";
    my $edb_sth = $core_db->prepare("LOAD DATA INFILE \'$edb\' INTO TABLE external_db");
    $edb_sth->execute();
  } else {
    print "external_db table already has $count rows, will not change it\n";
   }

}

1;