diff --git a/misc-scripts/xref_mapping/XrefParser/IlluminaWGParser.pm b/misc-scripts/xref_mapping/XrefParser/IlluminaWGParser.pm new file mode 100644 index 0000000000000000000000000000000000000000..12bb2fdf9b1419f71fae191137b8bd8a9f3d3323 --- /dev/null +++ b/misc-scripts/xref_mapping/XrefParser/IlluminaWGParser.pm @@ -0,0 +1,96 @@ +package XrefParser::IlluminaWGParser; + +use strict; + +use base qw( XrefParser::BaseParser ); + + +sub run { + + my ($self, $source_id, $species_id, $file) = @_; + + my @xrefs; + + my $file_io = $self->get_filehandle($file); + + if ( !defined $file_io ) { + print "Could not open $file\n"; + return 1; + } + + my $read = 0; + my $name_index; + my $seq_index; + my $defin_index; + while ( $_ = $file_io->getline() ) { + chomp; + + my $xref; + + # strip ^M at end of line + $_ =~ s/\015//g; + + if(/^\[/){ + print $_."\n"; + if(/^\[Probes/){ + my $header = $file_io->getline(); + print $header."\n"; + $read =1; + my @bits = split("\t", $header); + my $index =0; + foreach my $head (@bits){ + if($head eq "Search_Key"){ + $name_index = $index; + } + elsif($head eq "Probe_Sequence"){ + $seq_index = $index; + } + elsif($head eq "Definition"){ + $defin_index = $index; + } + $index++; + } + if(!defined($name_index) or !defined($seq_index) or !defined($defin_index)){ + die "Could not find index for search_key->$name_index, seq->$seq_index, definition->$defin_index"; + } + + next; + } + else{ + $read = 0; + } + } + if($read){ +# print $_."\n"; + my @bits = split("\t", $_); + my $sequence = $bits[$seq_index]; + + my $description = $bits[$defin_index]; + my $illumina_id = $bits[$name_index]; + + # build the xref object and store it + $xref->{ACCESSION} = $illumina_id; + $xref->{LABEL} = $illumina_id; + $xref->{SEQUENCE} = $sequence; + $xref->{SOURCE_ID} = $source_id; + $xref->{SPECIES_ID} = $species_id; + $xref->{DESCRIPTION} = $description; + $xref->{SEQUENCE_TYPE} = 'dna'; + $xref->{STATUS} = 'experimental'; + + push @xrefs, $xref; + + } + } + + $file_io->close(); + + print scalar(@xrefs) . " Illumina V2 xrefs succesfully parsed\n"; + + XrefParser::BaseParser->upload_xref_object_graphs(\@xrefs); + + + return 0; +} + +1; diff --git a/misc-scripts/xref_mapping/xref_config.ini b/misc-scripts/xref_mapping/xref_config.ini index f48fc3ba6190812638110411a9fcf77669b2231f..1961e3b3a3f157384171523b7fd65ec34084776b 100644 --- a/misc-scripts/xref_mapping/xref_config.ini +++ b/misc-scripts/xref_mapping/xref_config.ini @@ -854,6 +854,17 @@ parser = FastaParser release_uri = data_uri = file:Illumina_V1/Mouse-6_V1.fa +[source Illumina_V2::mus_musculus] +# Used by mus_musculus +name = Illumina_V2 +download = Y +order = 50 +priority = 1 +prio_descr = +parser = IlluminaWGParser +release_uri = +data_uri = file:Illumina_V2/MouseWG-6_V2_0_R1_11278593_A.txt + [source Illumina_V2::homo_sapiens] # Used by homo_sapiens name = Illumina_V2 @@ -2576,6 +2587,7 @@ source = InterproGO::MULTI source = IMGT/GENE_DB::mus_musculus source = IPI::mus_musculus source = Illumina_V1::mus_musculus +source = Illumina_V2::mus_musculus source = Interpro::MULTI source = OTTT::MULTI source = RefSeq_dna::mus_musculus