Skip to content
Snippets Groups Projects
CoreXrefParser.pm 4.17 KiB
Newer Older
package XrefParser::CoreXrefParser;

use strict;
use warnings;
use Carp;
use DBI;

use base qw( XrefParser::BaseParser );
use Bio::EnsEMBL::Registry;

sub run_script {

  my ($self, $ref_arg) = @_;
  my $source_id    = $ref_arg->{source_id};
  my $species_id   = $ref_arg->{species_id};
  my $file         = $ref_arg->{file};
  my $verbose      = $ref_arg->{verbose};

  if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){
    croak "Need to pass source_id, species_id and file as pairs";
  }
  $verbose |=0;

  my $biotype;
  my $object_type;
  my $project;

  if($file =~ /biotype[=][>](\S+?)[,]/){
    $biotype = $1;
  }
  if($file =~ /object_type[=][>](\S+?)[,]/){
    $object_type = $1;
  }
  if($file =~ /project[=][>](\S+?)[,]/){
    $project = $1;
  }
  if($file =~ /copy_description_from_object[=][>](\S+?)[,]/){
    $copy_description_from_object = $1;
  }

  my $external_db_name = $self->get_source_name_for_source_id($source_id);

  #copy object xrefs from core

  my $registry = "Bio::EnsEMBL::Registry";

  if ($project eq 'ensembl') {
      $registry->load_registry_from_multiple_dbs( 
	  {
	      '-host'    => 'ens-staging1',
	      '-user'    => 'ensro',
	  },
	  {
	      '-host'     => 'ens-staging2',
	      '-user'     => 'ensro',
	  },
       );
  } elsif ($project eq 'ensemblgenomes') {

      $registry->load_registry_from_multiple_dbs( 
	  {
	      '-host'     => 'mysql-eg-staging-1.ebi.ac.uk',
	      '-port'     => 4160,
	      '-user'     => 'ensro',
	  },
	  {
	      '-host'     => 'mysql-eg-staging-2.ebi.ac.uk',
	      '-port'     => 4275,
	      '-user'     => 'ensro',
	  },
 
      );

  } else {
      die("Missing or unsupported project value. Supported values: ensembl, ensemblgenomes");
  }

  #get the species name
  my %id2name = $self->species_id2name;
  my $species_name = $id2name{$species_id}[0];

  if (!$object_type) {
      $object_type = 'gene';
  }

  my %valid_object_types = (

      gene => 'Gene',
      transcript => 'Transcript',
      translation => 'Translation',
      Gene => 'Gene',
      Transcript => 'Transcript',
      Translation => 'Translation',
  );

  if (!exists($valid_object_types{$object_type}) ) {

      die("Unsupported object type value. Supported values: ", join(',', keys %valid_object_types) );
  }

  if ($biotype &&  $object_type ne 'gene' && $object_type ne 'transcript') {
      die("Incorrect parser argument values: expecting gene or transcript object type when biotype provided.\n");
  }

  my $object_adaptor = $registry->get_adaptor($species_name, 'core', $object_type);

  my @objects;

  if ($biotype) {
      @objects = @{$object_adaptor->fetch_all_by_biotype($biotype)};
  } else {
      @objects = @{$object_adaptor->fetch_all()};
  }

  my %added_xref;
  my $direct_count;

  foreach my $object (@objects) {

      my @xrefs = @{$object->get_all_DBEntries($external_db_name)};

      foreach my $xref (@xrefs) {

	  my $xref_id;

	  if (!exists($added_xref{$xref->primary_id()})) {

	      my $description = $xref->description();

	      if ($copy_description_from_object && !$description) {

		  if ($object->description()) {
                      #populate xref description with object description stripping the [Source: .. part
		      ($description) = $object->description() =~ /([^\[]+)/;
		      #trim trailing spaces
		      $description =~ s/\s+$//; 
		  }
	      }
	      
	      $xref_id = $self->add_xref({ acc        => $xref->primary_id(),
				      version    => $xref->version(),
				      label      => $xref->display_id(),
				      source_id  => $source_id,
				      species_id => $species_id,
				      info_type  => "DIRECT"} );


	      $added_xref{$xref->primary_id()} = $xref_id;
	  } 

	  if (!$xref_id) {
	      $xref_id = $added_xref{$xref->primary_id()};
	  }
	  
	  $self->add_direct_xref($xref_id, $object->stable_id(), $valid_object_types{$object_type}, "");
	  $direct_count++;
      }
  }

  my $xref_count = scalar(keys %added_xref);

  print "Added $xref_count $external_db_name xrefs and $direct_count $object_type direct xrefs\n" if($verbose);
  if ( !$xref_count ) {
      return 1;    # 1 error
  }

  return 0; # successfull
 

}

1;