-
Ian Longden authored
Remove getting source_id or species_id from filename if they are not set. As these have to be set or it dies before that bit is reached.
4d523bea
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
PomBaseParser.pm 2.99 KiB
package XrefParser::PomBaseParser;
use strict;
use warnings;
use Carp;
use POSIX qw(strftime);
use File::Basename;
use base qw( XrefParser::BaseParser );
# --------------------------------------------------------------------------------
# Parse command line and run if being run directly
sub run {
my ($self, $ref_arg) = @_;
my $source_id = $ref_arg->{source_id};
my $species_id = $ref_arg->{species_id};
my $files = $ref_arg->{files};
my $verbose = $ref_arg->{verbose};
if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
croak "Need to pass source_id, species_id and files as pairs";
}
$verbose |=0;
my $file = @{$files}[0];
my $gene_source_id = XrefParser::BaseParser->get_source_id_for_source_name("PomBase_GENE");
my $transcript_source_id = XrefParser::BaseParser->get_source_id_for_source_name("PomBase_TRANSCRIPT");
my $pombase_io = $self->get_filehandle($file);
if ( !defined $pombase_io ) {
print STDERR "ERROR: Could not open $file\n";
return 1; # 1 is an error
}
my $xref_count =0;
my $syn_count =0;
while ( $_ = $pombase_io->getline() ) {
chomp;
if ($_ =~ /^([^\t]+)\t([^\t]+)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)$/) {
my @line = split(m/\t/ms, $_);
my ($pombase_id, $name, $info_type, $biotype, $external_db_source, $desc, $ensembl_object_type, $synonyms) = undef;
$pombase_id = $line[0];
$name = $line[1];
$info_type = $line[2];
$biotype = $line[3];
$external_db_source = $line[4];
$desc = $line[5];
$ensembl_object_type = $line[6];
if (scalar @line == 8) {
$synonyms = $line[7];
}
# parse the lines corresponding to the gene entries
# and filter out lines corresponding to the CDS for example
#print "$ensembl_object_type\n";
if ($ensembl_object_type eq 'Gene') {
my $ensembl_xref_id = $self->add_xref($pombase_id,"",$name,$desc,$gene_source_id,$species_id,$info_type);
$self->add_direct_xref($ensembl_xref_id, $pombase_id, $ensembl_object_type, $info_type);
} elsif ($ensembl_object_type eq 'Transcript') {
my $ensembl_xref_id = $self->add_xref($pombase_id,"",$name,$desc,$transcript_source_id,$species_id,$info_type);