Skip to content
Snippets Groups Projects
Commit d39f377b authored by Ian Longden's avatar Ian Longden
Browse files

V2 Illumina parser for mouse

parent 0ad40ad3
No related branches found
No related tags found
No related merge requests found
package XrefParser::IlluminaWGParser;
use strict;
use base qw( XrefParser::BaseParser );
sub run {
my ($self, $source_id, $species_id, $file) = @_;
my @xrefs;
my $file_io = $self->get_filehandle($file);
if ( !defined $file_io ) {
print "Could not open $file\n";
return 1;
}
my $read = 0;
my $name_index;
my $seq_index;
my $defin_index;
while ( $_ = $file_io->getline() ) {
chomp;
my $xref;
# strip ^M at end of line
$_ =~ s/\015//g;
if(/^\[/){
print $_."\n";
if(/^\[Probes/){
my $header = $file_io->getline();
print $header."\n";
$read =1;
my @bits = split("\t", $header);
my $index =0;
foreach my $head (@bits){
if($head eq "Search_Key"){
$name_index = $index;
}
elsif($head eq "Probe_Sequence"){
$seq_index = $index;
}
elsif($head eq "Definition"){
$defin_index = $index;
}
$index++;
}
if(!defined($name_index) or !defined($seq_index) or !defined($defin_index)){
die "Could not find index for search_key->$name_index, seq->$seq_index, definition->$defin_index";
}
next;
}
else{
$read = 0;
}
}
if($read){
# print $_."\n";
my @bits = split("\t", $_);
my $sequence = $bits[$seq_index];
my $description = $bits[$defin_index];
my $illumina_id = $bits[$name_index];
# build the xref object and store it
$xref->{ACCESSION} = $illumina_id;
$xref->{LABEL} = $illumina_id;
$xref->{SEQUENCE} = $sequence;
$xref->{SOURCE_ID} = $source_id;
$xref->{SPECIES_ID} = $species_id;
$xref->{DESCRIPTION} = $description;
$xref->{SEQUENCE_TYPE} = 'dna';
$xref->{STATUS} = 'experimental';
push @xrefs, $xref;
}
}
$file_io->close();
print scalar(@xrefs) . " Illumina V2 xrefs succesfully parsed\n";
XrefParser::BaseParser->upload_xref_object_graphs(\@xrefs);
return 0;
}
1;
......@@ -854,6 +854,17 @@ parser = FastaParser
release_uri =
data_uri = file:Illumina_V1/Mouse-6_V1.fa
[source Illumina_V2::mus_musculus]
# Used by mus_musculus
name = Illumina_V2
download = Y
order = 50
priority = 1
prio_descr =
parser = IlluminaWGParser
release_uri =
data_uri = file:Illumina_V2/MouseWG-6_V2_0_R1_11278593_A.txt
[source Illumina_V2::homo_sapiens]
# Used by homo_sapiens
name = Illumina_V2
......@@ -2576,6 +2587,7 @@ source = InterproGO::MULTI
source = IMGT/GENE_DB::mus_musculus
source = IPI::mus_musculus
source = Illumina_V1::mus_musculus
source = Illumina_V2::mus_musculus
source = Interpro::MULTI
source = OTTT::MULTI
source = RefSeq_dna::mus_musculus
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment