Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
package XrefParser::RFAMParser;
use strict;
use warnings;
use Carp;
use DBI;
use base qw( XrefParser::BaseParser );
use Bio::EnsEMBL::Registry;
sub run {
my ($self, $ref_arg) = @_;
my $source_id = $ref_arg->{source_id};
my $species_id = $ref_arg->{species_id};
my $files = $ref_arg->{files};
my $verbose = $ref_arg->{verbose};
if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
croak "Need to pass source_id, species_id and file as pairs";
}
$verbose |=0;
my $file = @{$files}[0];
#get direct RFAM xrefs from core
my $registry = "Bio::EnsEMBL::Registry";
$registry->load_registry_from_multiple_dbs(
{
'-host' => 'ens-staging1',
'-user' => 'ensro',
},
{
'-host' => 'ens-staging2',
'-user' => 'ensro',
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
);
#get the species name
my %id2name = $self->species_id2name;
my $species_name = $id2name{$species_id}[0];
my $dba = $registry->get_DBAdaptor($species_name, 'core');
my $rfam_sql = "select distinct t.stable_id, hit_name from analysis a join transcript t on (a.analysis_id = t.analysis_id and a.logic_name = 'ncRNA' and t.biotype != 'miRNA') join exon_transcript et on (t.transcript_id = et.transcript_id) join supporting_feature sf on (et.exon_id = sf.exon_id and sf.feature_type = 'dna_align_feature' ) join dna_align_feature df on (sf.feature_id = df.dna_align_feature_id) order by hit_name";
my $sth = $dba->dbc->prepare($rfam_sql);
$sth->execute();
#hash keyed on RFAM accessions, value is an array of ensembl transcript stable_ids
my %rfam_transcript_stable_ids;
while (my ($stable_id, $hit_name) = $sth->fetchrow_array ) {
my $rfam_id;
if ($hit_name =~ /^(RF\d+)/) {
$rfam_id = $1;
}
if ($rfam_id) {
push @{$rfam_transcript_stable_ids{$rfam_id}}, $stable_id;
}
}
$sth->finish;
my $file_io = $self->get_filehandle($file);
if ( !defined $file_io ) {
print STDERR "ERROR: Could not open $file\n";
return 1; # 1 is an error
}
my @xrefs;
local $/ = "//\n";
my $xref_count;
my $direct_count;
while ($_ = $file_io->getline()) {
my $xref;
my $entry = $_;
chomp $entry;
next if (!$entry);
my ($accession) = $entry =~ /\n#=GF\sAC\s+(\w+)/;
my ($label) = $entry =~ /\n#=GF\sID\s+([^\n]+)/;
my ($description) = $entry =~ /\n#=GF\sDE\s+([^\n]+)/;
if (exists($rfam_transcript_stable_ids{$accession})){
#add xref
my $xref_id = $self->add_xref({ acc => $accession,
version => 0,
label => $label || $accession ,
desc => $description,
source_id => $source_id,
species_id => $species_id,
info_type => "DIRECT"} );
my @transcript_stable_ids = @{$rfam_transcript_stable_ids{$accession}};
foreach my $stable_id (@transcript_stable_ids){
$self->add_direct_xref($xref_id, $stable_id, "Transcript", "");
$direct_count++;
}
$xref_count++;
}
}
$file_io->close();
print "Added $xref_count RFAM xrefs and $direct_count direct xrefs\n" if($verbose);
if ( !$xref_count ) {
return 1; # 1 error
}
return 0; # successfull
}
1;