Skip to content
Snippets Groups Projects
Commit f993d60a authored by Wojtek Bazant's avatar Wojtek Bazant
Browse files

Code review from Tiago

parent e327bc83
No related branches found
No related tags found
6 merge requests!296C. elegans references use WormBase mapping to INSDC protein ids,!317Xref parser mgiparser_ccds,!342Feature/schema update 96,!342Feature/schema update 96,!317Xref parser mgiparser_ccds,!296C. elegans references use WormBase mapping to INSDC protein ids
......@@ -167,10 +167,10 @@ sub create_xrefs {
my $type = $self->type_from_file($file);
return unless $type;
while ( $_ = $refseq_io->getline() ) {
while ( my $entry = $refseq_io->getline() ) {
my $xref = $self->xref_from_record(
$_,
$entry,
\%name2species_id, \%taxonomy2species_id,
$pred_mrna_source_id, $pred_ncrna_source_id,
$mrna_source_id, $ncrna_source_id,
......@@ -196,7 +196,7 @@ sub type_from_file {
return 'dna' if $file =~ /rna/;
return 'peptide' if $file =~ /protein/;
print STDERR "Could not work out sequence type for $file\n";
return undef;
return;
}
sub xref_from_record {
my ( $self, $entry, $name2species_id, $taxonomy2species_id,
......@@ -262,7 +262,7 @@ sub xref_from_record {
$description =~ s/\s+/ /g;
$description = substr($description, 0, 255) if (length($description) > 255);
my ($seq) = $_ =~ /^\s*ORIGIN\s+(.+)/ms; # /s allows . to match newline
my ($seq) = $entry =~ /^\s*ORIGIN\s+(.+)/ms; # /s allows . to match newline
my @seq_lines = split /\n/, $seq;
my $parsed_seq = "";
foreach my $x (@seq_lines) {
......
......@@ -15,7 +15,8 @@ See the License for the specific language governing permissions and
limitations under the License.
=cut
use strict;
use warnings;
package XrefParser::WormbaseCElegansBase;
sub swap_dependency {
......@@ -44,11 +45,20 @@ sub swap_dependency {
LINKAGE_SOURCE_ID => $source_id,
DEPENDENT_XREFS => undef,
};
my @other_dependents_as_dependents_here = map {{%$_,INFO_TYPE => "DEPENDENT", LINKAGE_SOURCE_ID => $source_id}} @other_dependents;
my @dependents_here = ({
%$xref, INFO_TYPE => "DEPENDENT",
LINKAGE_SOURCE_ID => $source_id,
DEPENDENT_XREFS => undef,
});
for my $d (@other_dependents){
push @dependents_here, {
%$d, INFO_TYPE => "DEPENDENT", LINKAGE_SOURCE_ID => $source_id,
};
}
push @result, {
%$matching_source_id_dependent,
LABEL=>undef, INFO_TYPE => "MISC",
DEPENDENT_XREFS => [$xref_as_dependent_here, @other_dependents_as_dependents_here]
DEPENDENT_XREFS =>\@dependents_here,
};
}
......
......@@ -18,21 +18,22 @@ limitations under the License.
package XrefParser::WormbaseCElegansRefSeqGPFFParser;
use strict;
use warnings;
use parent qw/XrefParser::WormbaseCElegansBase XrefParser::RefSeqGPFFParser/;
my $SOURCE_IDS;
my $PATTERN;
my $ACCESSION_FROM_ENTRY_PATTERN;
sub run {
my ($self, $arg_ref) = @_;
my $type = $self->type_from_file(@{$arg_ref->{files}});
if($type eq 'peptide'){
$SOURCE_IDS = [ $self->get_source_id_for_source_name('protein_id') ];
$PATTERN = qr/This record has been curated by WormBase. The\s+reference sequence is identical to (.*?)\./;
$ACCESSION_FROM_ENTRY_PATTERN = qr/This record has been curated by WormBase. The\s+reference sequence is identical to (.*?)\./;
} elsif ($type eq 'dna'){
$SOURCE_IDS = [
$self->get_source_id_for_source_name('wormbase_cds'),
$self->get_source_id_for_source_name('wormbase_transcript'),
];
$PATTERN = qr/standard_name="(.*?)"/;
$ACCESSION_FROM_ENTRY_PATTERN = qr/standard_name="(.*?)"/;
}
die %$arg_ref unless @$SOURCE_IDS;
return $self->SUPER::run($arg_ref);
......@@ -51,17 +52,17 @@ sub xref_from_record {
return &modify_xref_with_dependent(
$SOURCE_IDS, $entry,
$self->SUPER::xref_from_record($entry, @args),
$PATTERN,
$ACCESSION_FROM_ENTRY_PATTERN,
);
}
sub modify_xref_with_dependent {
my ($source_ids, $entry, $xref, $pattern) = @_;
my ($source_ids, $entry, $xref, $get_accession_pattern) = @_;
return unless $xref;
return unless $entry =~ $pattern;
return unless $1;
my ($accession) = $entry =~ $get_accession_pattern;
return unless $accession;
$xref->{DEPENDENT_XREFS} //= [];
push @{$xref->{DEPENDENT_XREFS}}, map {{ACCESSION => $1, SOURCE_ID=>$_}} @$source_ids;
push @{$xref->{DEPENDENT_XREFS}}, map {{ACCESSION => $accession, SOURCE_ID=>$_}} @$source_ids;
return $xref;
}
1;
......@@ -15,7 +15,8 @@ See the License for the specific language governing permissions and
limitations under the License.
=cut
use strict;
use warnings;
package XrefParser::WormbaseCElegansUniProtParser;
# UniProt xrefs are sometimes - really - dependent xrefs of
......@@ -28,7 +29,7 @@ package XrefParser::WormbaseCElegansUniProtParser;
# INSDC entries have coordinates, and UniProt entries don't.
# So for perfect homologs, there can be many INSDC entries per UniProt.
use parent XrefParser::WormbaseCElegansBase, XrefParser::UniProtParser;
use parent qw/XrefParser::WormbaseCElegansBase XrefParser::UniProtParser/;
sub upload_xref_object_graphs {
my ($self, $xrefs, $dbi) = @_;
......
......@@ -82,12 +82,7 @@ sub run {
}
sub get_data {
my ($self, $file) = @_;
my $pep_io = $self->get_filehandle($file);
if ( !defined $pep_io ) {
print STDERR "ERROR: Could not open $file\n";
return 1; # 1 error
}
my $pep_io = $self->get_filehandle($file) or croak "Could not open: $file";
my $data = {};
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment