Skip to content
Snippets Groups Projects
Commit 09ff91f0 authored by Ian Longden's avatar Ian Longden
Browse files

removed as never used and just confusing

parent b94a0f50
No related branches found
No related tags found
No related merge requests found
package XrefParser::HGNC_ENSGParser;
use strict;
use DBI;
use base qw( XrefParser::BaseParser );
# Parse file of HGNC records and assign direct xrefs
# All assumed to be linked to genes
sub run {
my ($self, $source_id, $species_id, $file) = @_;
my $hugo_io = $self->get_filehandle($file);
if ( !defined $hugo_io ) {
print "Could not open $file\n";
return 1;
}
my $line_count = 0;
my $xref_count = 0;
# becouse the direct mapping have no descriptions etc
# we have to steal these fromt he previous HGNC parser.
# This is why the order states this is after the other one.
# maybe 1091,1092 is not right maybe should use name = HGNC and priority = 30r4 ??
my %label;
my %version;
my %description;
my $dbi = $self->dbi();
#get the source ids for HGNC refseq, entrezgene and unitprot
my $sql = 'select source_id, priority_description from source where name like "HGNC"';
my $sth = $dbi->prepare($sql);
$sth->execute();
my ($hgnc_source_id, $desc);
$sth->bind_columns(\$hgnc_source_id, \$desc);
my @arr;
while($sth->fetch()){
push @arr, $hgnc_source_id;
}
$sth->finish;
$sql = "select accession, label, version, description from xref where source_id in (".join(", ",@arr).")";
print "$sql\n";;
$sth = $dbi->prepare($sql);
$sth->execute();
my ($acc, $lab, $ver);
my $hgnc_loaded_count = 0;
$sth->bind_columns(\$acc, \$lab, \$ver, \$desc);
while (my @row = $sth->fetchrow_array()) {
$label{$acc} = $lab;
$version{$acc} = $ver;
$description{$acc} = $desc;
$hgnc_loaded_count++;
}
$sth->finish;
if($hgnc_loaded_count == 0){
die "No point continuing no hgncs there\n";
}
print "$hgnc_loaded_count HGNC's to be used as labels\n";
my $ignore_count = 0;
my $ignore_examples ="";
my %acc;
while ( $_ = $hugo_io->getline() ) {
my ($hgnc, $stable_id) = split;
if(!defined($label{$hgnc})){
$ignore_count++;
if($ignore_count < 10){
$ignore_examples .= " ".$hgnc;
}
next;
}
if(!defined($acc{$hgnc})){
$acc{$hgnc} = 1;
my $version ="";
$line_count++;
my $xref_id = $self->add_xref($hgnc, $version{$hgnc} , $label{$hgnc}||$hgnc , $description{$hgnc}, $source_id, $species_id, "DIRECT");
$xref_count++;
$self->add_direct_xref($xref_id, $stable_id, "gene", "");
}
}
print "Parsed $line_count HGNC identifiers from $file, added $xref_count xrefs and $line_count direct_xrefs\n";
if($ignore_count){
print $ignore_count." ignoreed due to numbers no identifiers being no longer valid :- $ignore_examples\n";
}
$hugo_io->close();
return 0;
}
1;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment