GramenePathwayParser.pm 2.34 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
package XrefParser::GramenePathwayParser;

=pod

=head1 NAME

XrefParser::GramenePathwayParser

=head1 DESCRIPTION

Parse pathway dumps from Gramene.  File format (and example data):

 gene_name                  AT1G66030
 enzyme_name                fatty acid (omega-1)-hydroxylase
 reaction_id                RXN-7796
 reaction_name              
 ec                         2.7.7.-
 pathway_id                 PWY-5129
 pathway_name               sphingolipid biosynthesis (plants)

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.eduE<gt>.

=cut

use strict;
use Text::RecordParser::Tab;
use base 'XrefParser::BaseParser';

sub run {
    my ($self, $args) = @_;
    my $source_id     = $args->{'source_id'};
    my $species_id    = $args->{'species_id'};
    my $files         = $args->{'files'};
    my $release_file  = $args->{'rel_file'};
    my $verbose       = $args->{'verbose'};
    my $file          = ref $files eq 'ARRAY' ? shift @$files : '';

    if ( !$file ) {
        printf STDERR "%s called without a 'files' argument\n%s", 
            __PACKAGE__, Dumper($args);
        return 1; # error
    }

    my $p = Text::RecordParser::Tab->new( $file );

    my $direct_xref_count = 0;
    while ( my $rec = $p->fetchrow_hashref ) {
        my $gene = $rec->{'gene_name'} or next;

        if ( my $ec = $rec->{'ec'} ) {
            my $ec_xref_id = $self->add_xref({
                source_id  => $source_id,
                species_id => $species_id,
                acc        => $ec,
                label      => '',
                desc       => '',
                info_type  => 'DIRECT',
            });

            $self->add_direct_xref( $ec_xref_id, $gene, 'Gene', 'DIRECT' );
            $direct_xref_count++;
        }

        if ( my $pathway_id = $rec->{'pathway_id'} ) {
            my $pathway_xref_id = $self->add_xref({
                source_id  => $source_id,
                species_id => $species_id,
                acc        => $pathway_id,
                label      => $rec->{'pathway_name'},
                desc       => '',
                info_type  => 'DIRECT'
            });

            $self->add_direct_xref( $pathway_xref_id, $gene, 'Gene', 'DIRECT' );
            $direct_xref_count++;
        }
    }

    printf "Parsed pathway Ids from file '%s,' added %s direct_xrefs\n",
        $file, $direct_xref_count;

    return 0; # success
}

1;