diff --git a/misc-scripts/xref_mapping/XrefMapper/ProcessPaired.pm b/misc-scripts/xref_mapping/XrefMapper/ProcessPaired.pm index ddeed0f930470e0665ef9416b2f1de2c18ef7f62..ce2d56e77b175ccd9e647b352b0b6cf25c08f155 100644 --- a/misc-scripts/xref_mapping/XrefMapper/ProcessPaired.pm +++ b/misc-scripts/xref_mapping/XrefMapper/ProcessPaired.pm @@ -69,14 +69,8 @@ sub process{ #this query is used to check if and object_xref exists for the related translation and paired RefSeq_peptide% with a status of 'DUMP_OUT' my $ox_translation_sth = $self->xref->dbc->prepare("select ox.object_xref_id, ox.xref_id from object_xref ox join xref x using(xref_id) where ox.ox_status in ('DUMP_OUT', 'FAILED_PRIORITY') and ox.ensembl_object_type = 'Translation' and ox.ensembl_id = ? and x.source_id = ? and x.accession = ?"); - #this query is used to check if and object_xref exists for the related translation and paired RefSeq_peptide% with any status - my $ox_translation_other_status_sth = $self->xref->dbc->prepare("select ox.object_xref_id, ox.xref_id from object_xref ox join xref x using(xref_id) where ox.ensembl_object_type = 'Translation' and ox.ensembl_id = ? and x.source_id = ? and x.accession = ?"); - - my $ox_insert_sth = $self->xref->dbc->prepare("insert into object_xref (object_xref_id, xref_id, ensembl_id, ensembl_object_type, linkage_type, ox_status) values(?, ?, ?, ?, 'INFERRED_PAIR', 'DUMP_OUT')"); - my $ox_update_sth = $self->xref->dbc->prepare("update object_xref set ox_status = 'DUMP_OUT', linkage_type = 'INFERRED_PAIR' where object_xref_id = ?"); - my $xref_sth = $self->xref->dbc->prepare("select xref_id from xref where accession = ? and source_id = ?"); my $xref_update_sth = $self->xref->dbc->prepare("update xref set info_type = 'INFERRED_PAIR' where xref_id = ?"); @@ -135,11 +129,10 @@ sub process{ $transcr_obj_xrefs_sth->finish(); $ox_translation_sth->finish(); - $ox_translation_other_status_sth->finish(); $ox_insert_sth->finish(); - $ox_update_sth->finish(); $xref_update_sth->finish(); $identity_update_sth->finish(); + $xref_sth->finish(); #go through RefSeq_peptide% object_xrefs $transl_object_xrefs_sth->execute(); diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm index b723c8dba1bcdab235a63a248241601aab2c3d5f..b855ec950ac605d9f84fae137f4894aa25137a12 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm @@ -201,7 +201,7 @@ sub run_script { my $sa = $core_dba->get_SliceAdaptor(); my $sa_of = $otherf_dba->get_SliceAdaptor(); - my $chromosomes_of = $sa_of->fetch_all('chromosome', undef, 1); + my $chromosomes_of = $sa_of->fetch_all('toplevel', undef, 1); # Fetch analysis object for refseq my $aa_of = $otherf_dba->get_AnalysisAdaptor(); @@ -250,7 +250,7 @@ sub run_script { } # Fetch slice in core database which overlaps refseq transcript - my $chromosome = $sa->fetch_by_region('chromosome', $chr_name, $transcript_of->seq_region_start, $transcript_of->seq_region_end); + my $chromosome = $sa->fetch_by_region('toplevel', $chr_name, $transcript_of->seq_region_start, $transcript_of->seq_region_end); my $transcripts = $chromosome->get_all_Transcripts(1); # Create a range registry for all the exons of the ensembl transcript diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm index 37da140c579c3bbe5cc547882466acfc5b1af2d1..5eeedd9f6704704bb702f204d98eb38cfeb18f63 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm @@ -142,8 +142,17 @@ sub create_xrefs { my %name2species_id = map{ $_=>$species_id } @names; my %taxonomy2species_id = map{ $_=>$species_id } @tax_ids; + # Retrieve existing RefSeq mRNA + my (%refseq_ids) = (%{ $self->get_valid_codes("RefSeq_mRNA", $species_id) }, %{ $self->get_valid_codes("RefSeq_mRNA_predicted", $species_id) }); + my (%entrez_ids) = %{ $self->get_valid_codes("EntrezGene", $species_id) }; + my (%wiki_ids) = %{ $self->get_valid_codes("WikiGene", $species_id) }; + + my %dependent_sources = $self->get_xref_sources(); + my $dbi = $self->dbi(); + my $add_dependent_xref_sth = $self->dbi->prepare("INSERT INTO dependent_xref (master_xref_id,dependent_xref_id, linkage_source_id) VALUES (?,?, $entrez_source_id)"); + my $refseq_io = $self->get_filehandle($file); if ( !defined $refseq_io ) { @@ -298,6 +307,21 @@ sub create_xrefs { $dep2{ACCESSION} = $ll; $dep2{LABEL} = $entrez{$ll}; push @{$xref->{DEPENDENT_XREFS}}, \%dep2; + + # Add xrefs for RefSeq mRNA as well where available + $refseq_pair =~ s/\.[0-9]*//; + if (defined $refseq_pair) { + if ($refseq_ids{$refseq_pair}) { + foreach my $refseq_id (@{ $refseq_ids{$refseq_pair} }) { + foreach my $entrez_id (@{ $entrez_ids{$ll} }) { + $add_dependent_xref_sth->execute($refseq_id, $entrez_id); + } + foreach my $wiki_id (@{ $wiki_ids{$ll} }) { + $add_dependent_xref_sth->execute($refseq_id, $wiki_id); + } + } + } + } } } diff --git a/misc-scripts/xref_mapping/xref_config.ini b/misc-scripts/xref_mapping/xref_config.ini index 4959a2446c8f0fb8a1d57cbf2b05e983c6711482..767ac701738cb148f61aeab7afee0586f4af760e 100644 --- a/misc-scripts/xref_mapping/xref_config.ini +++ b/misc-scripts/xref_mapping/xref_config.ini @@ -2388,7 +2388,7 @@ data_uri = ftp://ftp.ncbi.nih.gov/genomes/Astyanax_mexicanus/RNA/rna.gbk. # Used by saccharomyces_cerevisiae name = RefSeq_peptide download = Y -order = 20 +order = 30 priority = 2 prio_descr = parser = RefSeqGPFFParser @@ -2398,7 +2398,7 @@ data_uri = ftp://ftp.ncbi.nih.gov/refseq/release/fungi/fungi*.protein.gpf [source RefSeq_peptide::MULTI-Plants] name = RefSeq_peptide download = Y -order = 20 +order = 30 priority = 2 prio_descr = parser = RefSeqGPFFParser @@ -2410,7 +2410,7 @@ data_uri = ftp://ftp.ncbi.nih.gov/refseq/release/plant/plant*.protein.gpf # Used by phaeodactylum_tricornutum name = RefSeq_peptide download = Y -order = 20 +order = 30 priority = 2 prio_descr = parser = RefSeqGPFFParser @@ -2422,7 +2422,7 @@ data_uri = ftp://ftp.ncbi.nih.gov/refseq/release/complete/complete*.prote # Used by dictyostelium_discoideum name = RefSeq_peptide download = Y -order = 20 +order = 30 priority = 2 prio_descr = parser = RefSeqGPFFParser @@ -2433,7 +2433,7 @@ data_uri = ftp://ftp.ncbi.nih.gov/refseq/release/protozoa/protozoa*.prote # Used by caenorhabditis_elegans, ciona_savignyi, drosophila_melanogaster name = RefSeq_peptide download = Y -order = 20 +order = 30 priority = 2 prio_descr = parser = RefSeqGPFFParser @@ -2444,7 +2444,7 @@ data_uri = ftp://ftp.ncbi.nih.gov/refseq/release/invertebrate/invertebrat # Special source used in RefSeqGPFFParser. No species uses this source. name = RefSeq_peptide_predicted download = N -order = 20 +order = 30 priority = 2 prio_descr = parser = RefSeqGPFFParser @@ -2454,7 +2454,7 @@ release_uri = # Used by cavia_porcellus, erinaceus_europaeus, gorilla_gorilla, macaca_mulatta, monodelphis_domestica, myotis_lucifugus, ochotona_princeps, ornithorhynchus_anatinus, ictidomys_tridecemlineatus, tupaia_belangeri, loxodonta_africana, callithrix_jacchus name = RefSeq_peptide download = Y -order = 20 +order = 30 priority = 2 prio_descr = parser = RefSeqGPFFParser @@ -2465,7 +2465,7 @@ data_uri = ftp://ftp.ncbi.nih.gov/refseq/release/vertebrate_mammalian/ver # Used by anolis_carolinensis, ciona_intestinalis, ciona_savignyi, ficedula_albicollis, gasterosteus_aculeatus, oryzias_latipes, taeniopygia_guttata, takifugu_rubripes, xenopus_tropicalis name = RefSeq_peptide download = Y -order = 20 +order = 30 priority = 2 prio_descr = parser = RefSeqGPFFParser