Commit 7e845316 authored by Monika Komorowska's avatar Monika Komorowska
Browse files

look up stable_ids in core and create direct xrefs

parent 89053cc6
......@@ -13,51 +13,133 @@ use strict;
use warnings;
use Carp;
use base qw( XrefParser::BaseParser );
use Bio::EnsEMBL::Registry;
sub meta_key {
my ($self) = @_;
return "array_express.exported";
}
sub run {
sub run_script {
my ($self, $ref_arg) = @_;
my $source_id = $ref_arg->{source_id};
my $species_id = $ref_arg->{species_id};
my $files = $ref_arg->{files};
my $file = $ref_arg->{file};
my $verbose = $ref_arg->{verbose};
if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
croak "Need to pass source_id, species_id and files as pairs";
if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){
croak "Need to pass source_id, species_id and file as pairs";
}
$verbose |=0;
my $project;
my $wget = "";
if($file =~ /project[=][>](\S+?)[,]/){
$project = $1;
}
if($file =~ /wget[=][>](\S+?)[,]/){
$wget = $1;
}
my $ua = LWP::UserAgent->new();
$ua->timeout(10);
$ua->env_proxy();
my $response = $ua->get($wget);
if ( !$response->is_success() ) {
warn($response->status_line);
return 1;
}
$verbose ||=0;
my @lines = split(/\n/,$response->content);
my %species_id_to_names = $self->species_id2name();
my $species_id_to_names = \%species_id_to_names;
my $names = $species_id_to_names->{$species_id};
my $contents_lookup = $self->_get_contents($files, $verbose);
my $contents_lookup = $self->_get_contents(\@lines, $verbose);
my $active = $self->_is_active($contents_lookup, $names, $verbose);
if (!$active) {
return;
}
if ($active && $verbose) {
print "ArrayExpress xrefs will be created when running xref_mapper.pl/DirectXrefs.pm as gene stable ids are required to create the xrefs\n";
#get stable_ids from core and create xrefs
my $registry = "Bio::EnsEMBL::Registry";
if ($project eq 'ensembl') {
$registry->load_registry_from_multiple_dbs(
{
'-host' => 'ens-staging1',
'-user' => 'ensro',
},
{
'-host' => 'ens-staging2',
'-user' => 'ensro',
},
);
} elsif ($project eq 'ensemblgenomes') {
$registry->load_registry_from_multiple_dbs(
{
'-host' => 'mysql-eg-staging-1.ebi.ac.uk',
'-port' => 4160,
'-user' => 'ensro',
},
{
'-host' => 'mysql-eg-staging-2.ebi.ac.uk',
'-port' => 4275,
'-user' => 'ensro',
},
);
} else {
die("Missing or unsupported project value. Supported values: ensembl, ensemblgenomes");
}
$self->add_meta_pair($self->meta_key(),$active);
$self->add_meta_pair('species_id',$species_id);
return;
#get the species name
my $species_name = $species_id_to_names{$species_id}[0];
my $gene_adaptor = $registry->get_adaptor($species_name, 'core', 'Gene');
my @stable_ids = map { $_->stable_id } @{$gene_adaptor->fetch_all()};
my $xref_count = 0;
foreach my $gene_stable_id (@stable_ids) {
my $xref_id = $self->add_xref({ acc => $gene_stable_id,
label => $gene_stable_id,
source_id => $source_id,
species_id => $species_id,
info_type => "DIRECT"} );
$self->add_direct_xref( $xref_id, $gene_stable_id, 'gene', '');
if ($xref_id) {
$xref_count++;
}
}
print "Added $xref_count DIRECT xrefs\n" if($verbose);
if ( !$xref_count ) {
return 1; # 1 error
}
return 0; # successfull
}
sub _get_contents {
my ($self, $files, $verbose) = @_;
my ($self, $lines, $verbose) = @_;
my @lines = @$lines;
my %lookup;
my $fh = $self->get_filehandle($files->[0]);
while(my $line = <$fh>) {
chomp $line;
foreach my $line (@lines) {
my ($species, $remainder) = $line =~ /^([a-z|A-Z]+)_(.+)$/;
croak "The line '$line' is not linked to a gene set. This is unexpected." if $remainder !~ /gene/;
$lookup{$species} = 1;
}
close ($fh);
if($verbose) {
printf("ArrayExpress is using the species [%s]\n", join(q{, }, keys %lookup));
}
......@@ -90,53 +172,5 @@ sub _is_active {
return $active;
}
#this method is called from XrefMapper/DirectXrefs.pm
sub create_xrefs {
my $self = shift;
my $verbose = shift;
my $array_xrefs_meta_key = $self->meta_key();
if ($array_xrefs_meta_key) {
my $active = $self->get_meta_value($array_xrefs_meta_key);
if ($active) {
#create ArrayExpress direct xrefs
my $source_name = 'ArrayExpress';
my $source_id = $self->get_source_id_for_source_name($source_name);
my $species_id = $self->get_meta_value('species_id');
#get gene stable_ids
my $gene_id_sth = $self->dbi()->prepare("select stable_id from gene_stable_id order by stable_id");
$gene_id_sth->execute();
my $gene_stable_id;
$gene_id_sth->bind_columns(\$gene_stable_id);
my $xref_count = 0;
while ($gene_id_sth->fetch()) {
my $xref_id = $self->add_xref({ acc => $gene_stable_id,
label => $gene_stable_id,
source_id => $source_id,
species_id => $species_id,
info_type => "DIRECT"} );
$self->add_direct_xref( $xref_id, $gene_stable_id, 'gene', '');
if ($xref_id) {
$xref_count++;
}
}
$gene_id_sth->finish();
if ($xref_count > 0) {
print "Loaded $xref_count $source_name DIRECT xrefs\n" if $verbose;
} else {
print "Warning: 0 $source_name DIRECT xrefs loaded even though $source_name is active for the species.\n";
}
}
}
}
1;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment