Skip to content
Snippets Groups Projects
Commit 3ac96484 authored by Ian Longden's avatar Ian Longden
Browse files

Added the new refseq sources _mRNA and _ncRNA

parent 68041e25
No related branches found
No related tags found
No related merge requests found
...@@ -19,26 +19,40 @@ sub run { ...@@ -19,26 +19,40 @@ sub run {
my $files_ref = shift; my $files_ref = shift;
my $rel_file = shift; my $rel_file = shift;
$verbose = shift; $verbose = shift;
my @files = @{$files_ref}; my @files = @{$files_ref};
my $peptide_source_id = my $peptide_source_id =
$self->get_source_id_for_source_name('RefSeq_peptide'); $self->get_source_id_for_source_name('RefSeq_peptide');
my $dna_source_id = my $dna_source_id =
$self->get_source_id_for_source_name('RefSeq_dna'); $self->get_source_id_for_source_name('RefSeq_dna');
my $mrna_source_id =
$self->get_source_id_for_source_name('RefSeq_mRNA');
my $ncrna_source_id =
$self->get_source_id_for_source_name('RefSeq_ncRNA');
print "RefSeq_peptide source ID = $peptide_source_id\n" if($verbose);
print "RefSeq_dna source ID = $dna_source_id\n" if($verbose);
my $pred_peptide_source_id = my $pred_peptide_source_id =
$self->get_source_id_for_source_name('RefSeq_peptide_predicted'); $self->get_source_id_for_source_name('RefSeq_peptide_predicted');
my $pred_dna_source_id = my $pred_dna_source_id =
$self->get_source_id_for_source_name('RefSeq_dna_predicted'); $self->get_source_id_for_source_name('RefSeq_dna_predicted');
my $pred_mrna_source_id =
print "RefSeq_peptide_predicted source ID = " $self->get_source_id_for_source_name('RefSeq_mRNA_predicted');
. "$pred_peptide_source_id\n" if($verbose); my $pred_ncrna_source_id =
print "RefSeq_dna_predicted source ID = $pred_dna_source_id\n" if($verbose); $self->get_source_id_for_source_name('RefSeq_ncRNA_predicted');
if($verbose){
print "RefSeq_peptide source ID = $peptide_source_id\n";
print "RefSeq_dna source ID = $dna_source_id\n";
print "RefSeq_mRNA source ID = $mrna_source_id\n";
print "RefSeq_ncRNA source ID = $ncrna_source_id\n";
print "RefSeq_peptide_predicted source ID = $pred_peptide_source_id\n";
print "RefSeq_dna_predicted source ID = $pred_dna_source_id\n" ;
print "RefSeq_mRNA_predicted source ID = $pred_mrna_source_id\n" ;
print "RefSeq_ncRNA_predicted source ID = $pred_ncrna_source_id\n" ;
}
my @xrefs; my @xrefs;
foreach my $file (@files) { foreach my $file (@files) {
...@@ -51,6 +65,8 @@ sub run { ...@@ -51,6 +65,8 @@ sub run {
$dna_source_id, $dna_source_id,
$pred_peptide_source_id, $pred_peptide_source_id,
$pred_dna_source_id, $pred_dna_source_id,
$mrna_source_id, $ncrna_source_id,
$pred_mrna_source_id, $pred_ncrna_source_id,
$file, $file,
$species_id ); $species_id );
...@@ -83,8 +99,12 @@ s/.*(NCBI Reference Sequence.*) Distribution Release Notes.*/$1/s; ...@@ -83,8 +99,12 @@ s/.*(NCBI Reference Sequence.*) Distribution Release Notes.*/$1/s;
$self->set_release( $source_id, $release ); $self->set_release( $source_id, $release );
$self->set_release( $peptide_source_id, $release ); $self->set_release( $peptide_source_id, $release );
$self->set_release( $dna_source_id, $release ); $self->set_release( $dna_source_id, $release );
$self->set_release( $mrna_source_id, $release );
$self->set_release( $ncrna_source_id, $release );
$self->set_release( $pred_peptide_source_id, $release ); $self->set_release( $pred_peptide_source_id, $release );
$self->set_release( $pred_dna_source_id, $release ); $self->set_release( $pred_dna_source_id, $release );
$self->set_release( $pred_mrna_source_id, $release );
$self->set_release( $pred_ncrna_source_id, $release );
} }
return 0; # successfull return 0; # successfull
...@@ -102,7 +122,8 @@ sub create_xrefs { ...@@ -102,7 +122,8 @@ sub create_xrefs {
my $self = shift; my $self = shift;
my ( $peptide_source_id, $dna_source_id, $pred_peptide_source_id, my ( $peptide_source_id, $dna_source_id, $pred_peptide_source_id,
$pred_dna_source_id, $file, $species_id ) = @_; $pred_dna_source_id, $mrna_source_id, $ncrna_source_id,
$pred_mrna_source_id, $pred_ncrna_source_id, $file, $species_id ) = @_;
# Create a hash of all valid names for this species # Create a hash of all valid names for this species
my %species2name = $self->species_id2name(); my %species2name = $self->species_id2name();
...@@ -153,8 +174,14 @@ sub create_xrefs { ...@@ -153,8 +174,14 @@ sub create_xrefs {
$xref->{SEQUENCE_TYPE} = 'dna'; $xref->{SEQUENCE_TYPE} = 'dna';
$xref->{STATUS} = 'experimental'; $xref->{STATUS} = 'experimental';
my $source_id; my $source_id;
if ($acc =~ /^XM_/ || $acc =~ /^XR/) { if ($acc =~ /^XM_/ ){
$source_id = $pred_dna_source_id; $source_id = $pred_mrna_source_id;
} elsif( $acc =~ /^XR/) {
$source_id = $pred_ncrna_source_id;
} elsif( $acc =~ /^NM/) {
$source_id = $mrna_source_id;
} elsif( $acc =~ /^NR/) {
$source_id = $ncrna_source_id;
} else { } else {
$source_id = $dna_source_id; $source_id = $dna_source_id;
} }
......
...@@ -35,7 +35,15 @@ sub run_script { ...@@ -35,7 +35,15 @@ sub run_script {
$pass = $1; $pass = $1;
} }
my $dna_pred = XrefParser::BaseParser->get_source_id_for_source_name("RefSeq_dna_predicted"); # my $dna_pred = XrefParser::BaseParser->get_source_id_for_source_name("RefSeq_dna_predicted");
my $mrna_source_id =
$self->get_source_id_for_source_name('RefSeq_mRNA');
my $ncrna_source_id =
$self->get_source_id_for_source_name('RefSeq_ncRNA');
my $pred_mrna_source_id =
$self->get_source_id_for_source_name('RefSeq_mRNA_predicted');
my $pred_ncrna_source_id =
$self->get_source_id_for_source_name('RefSeq_ncRNA_predicted');
# becouse the direct mapping have no descriptions etc # becouse the direct mapping have no descriptions etc
# we have to steal these from the previous Refseq parser. # we have to steal these from the previous Refseq parser.
...@@ -45,7 +53,7 @@ sub run_script { ...@@ -45,7 +53,7 @@ sub run_script {
my %description; my %description;
my $dbi = $self->dbi(); my $dbi = $self->dbi();
my $sql = "select xref.accession, xref.label, xref.version, xref.description from xref, source where xref.source_id = source.source_id and source.name = 'RefSeq_dna'"; my $sql = "select xref.accession, xref.label, xref.version, xref.description from xref, source where xref.source_id = source.source_id and source.name like 'RefSeq_%RNA'";
my $sth = $dbi->prepare($sql); my $sth = $dbi->prepare($sql);
$sth->execute(); $sth->execute();
my ($acc, $lab, $ver, $desc); my ($acc, $lab, $ver, $desc);
...@@ -140,9 +148,18 @@ sub run_script { ...@@ -140,9 +148,18 @@ sub run_script {
if(!defined($seen{$refseq})){ if(!defined($seen{$refseq})){
$seen{$refseq} = 1; $seen{$refseq} = 1;
my $new_source_id = $source_id; my $new_source_id = $source_id;
if($refseq =~ /^XM/){ if ($refseq =~ /^XM_/ ){
$new_source_id = $dna_pred; $new_source_id = $pred_mrna_source_id;
} elsif( $refseq =~ /^XR/) {
$new_source_id = $pred_ncrna_source_id;
} elsif( $refseq =~ /^NM/) {
$new_source_id = $mrna_source_id;
} elsif( $refseq =~ /^NR/) {
$new_source_id = $ncrna_source_id;
} }
# if($refseq =~ /^XM/){
# $new_source_id = $dna_pred;
# }
my $xref_id = $self->add_xref($refseq, $version{$refseq} , $label{$refseq}||$refseq , my $xref_id = $self->add_xref($refseq, $version{$refseq} , $label{$refseq}||$refseq ,
$description{$refseq}, $new_source_id, $species_id, "DIRECT"); $description{$refseq}, $new_source_id, $species_id, "DIRECT");
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment