Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
PomBaseParser.pm 2.99 KiB
package XrefParser::PomBaseParser;

use strict;
use warnings;
use Carp;
use POSIX qw(strftime);
use File::Basename;

use base qw( XrefParser::BaseParser );

# --------------------------------------------------------------------------------
# Parse command line and run if being run directly

sub run {

  my ($self, $ref_arg) = @_;
  my $source_id    = $ref_arg->{source_id};
  my $species_id   = $ref_arg->{species_id};
  my $files        = $ref_arg->{files};
  my $verbose      = $ref_arg->{verbose};

  if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
    croak "Need to pass source_id, species_id and files as pairs";
  }
  $verbose |=0;

  my $file = @{$files}[0];

  my $gene_source_id = XrefParser::BaseParser->get_source_id_for_source_name("PomBase_GENE");
  my $transcript_source_id = XrefParser::BaseParser->get_source_id_for_source_name("PomBase_TRANSCRIPT");

  my $pombase_io = $self->get_filehandle($file);

  if ( !defined $pombase_io ) {
    print STDERR "ERROR: Could not open $file\n";
    return 1;    # 1 is an error
  }

  my $xref_count =0;
  my $syn_count =0;

  while ( $_ = $pombase_io->getline() ) {

    chomp;

    if ($_ =~ /^([^\t]+)\t([^\t]+)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)$/) {
	    
	    my @line = split(m/\t/ms, $_);
	    my ($pombase_id, $name, $info_type, $biotype, $external_db_source, $desc, $ensembl_object_type, $synonyms) = undef;
	    
	    $pombase_id          = $line[0];
	    $name                = $line[1];
	    $info_type           = $line[2];
	    $biotype             = $line[3];
            $external_db_source  = $line[4];
	    $desc                = $line[5];
	    $ensembl_object_type = $line[6];
	    
	    if (scalar @line == 8) {
	        $synonyms = $line[7];
	    }
	    # parse the lines corresponding to the gene entries
	    # and filter out lines corresponding to the CDS for example
	   
            #print "$ensembl_object_type\n"; 
	    if ($ensembl_object_type eq 'Gene') {
	        my $ensembl_xref_id = $self->add_xref($pombase_id,"",$name,$desc,$gene_source_id,$species_id,$info_type);
	        $self->add_direct_xref($ensembl_xref_id, $pombase_id, $ensembl_object_type, $info_type);
	    } elsif ($ensembl_object_type eq 'Transcript') {
	        my $ensembl_xref_id = $self->add_xref($pombase_id,"",$name,$desc,$transcript_source_id,$species_id,$info_type);