Skip to content
Snippets Groups Projects
Commit 109e780d authored by Ian Longden's avatar Ian Longden
Browse files

New parsing of the description, remving RecName: etc..

parent a8922c2d
No related branches found
No related tags found
No related merge requests found
......@@ -301,17 +301,109 @@ sub create_xrefs {
@all_lines = split /\n/, $description_and_rest;
# extract ^DE lines only & build cumulative description string
my $description;
my $description = " ";
my $name = "";
my $flags = " ";
my $mode = "";
foreach my $line (@all_lines) {
my ($description_only) = $line =~ /^DE\s+(.+)/;
$description .= $description_only if ($description_only);
$description .= " ";
next if(!($line =~ /^DE/));
# Set up the mode first
if($line =~ /^DE RecName:/){
if($mode eq "RecName"){
$description .= ";";
}
$mode = "RecName";
}
elsif($line =~ /^DE SubName:/){
if($mode eq "RecName"){
$description .= ";";
}
$mode = "RecName";
}
elsif($line =~ /^DE AltName:/){
$mode = "AltName";
}
elsif($line =~ /^DE Contains:/){
if($mode eq "Contains"){
$description .= ";";
}
elsif($mode eq "Includes"){
$description .= "][Contains ";
}
else{
$description .= " [Contains ";
}
$mode = "Contains";
next;
}
elsif($line =~ /^DE Includes:/){
if($mode eq "Includes"){
$description .= ";";
}
elsif($mode eq "Contains"){
$description .= "][Includess";
}
else{
$description .= " [Includes ";
}
$mode = "Includes";
next;
}
elsif($line =~ /^DE Flags: (.*);/){
$flags .= "$1 ";
next;
}
# now get the data
if($line =~ /^DE RecName: Full=(.*);/){
$name .= $1;
}
elsif($line =~ /RecName: Full=(.*);/){
$description .= $1;
}
elsif($line =~ /SubName: Full=(.*);/){
$name .= $1;
}
elsif($line =~ /AltName: Full=(.*);/){
$description .= "(".$1.")";
}
elsif($line =~ /Short=(.*);/){
$description .= "(".$1.")";
}
elsif($line =~ /EC=(.*);/){
$description .= "(EC ".$1.")";
}
elsif($line =~ /Allergen=(.*);/){
$description .= "(Allergen ".$1.")";
}
elsif($line =~ /INN=(.*);/){
$description .= "(".$1.")";
}
elsif($line =~ /Biotech=(.*);/){
$description .= "(".$1.")";
}
elsif($line =~ /CD_antigen=(.*);/){
$description .= "(".$1." antigen)";
}
else{
print STDERR "unable to process *$line* for $acc\n";
}
}
if($mode eq "Contains" or $mode eq "Includes"){
$description .= "]";
}
$description =~ s/^\s*//g;
$description =~ s/\s*$//g;
$xref->{DESCRIPTION} = $description;
$xref->{DESCRIPTION} = $name.$flags.$description;
# extract sequence
my ($seq) = $_ =~ /SQ\s+(.+)/s; # /s allows . to match newline
......
......@@ -280,18 +280,105 @@ sub create_xrefs {
@all_lines = split /\n/, $description_and_rest;
# extract ^DE lines only & build cumulative description string
my $description;
my $description = " ";
my $name = "";
my $flags = " ";
my $mode = "";
foreach my $line (@all_lines) {
my ($description_only) = $line =~ /^DE\s+(.+)/;
$description .= $description_only if ($description_only);
$description .= " ";
next if(!($line =~ /^DE/));
# Set up the mode first
if($line =~ /^DE RecName:/){
if($mode eq "RecName"){
$description .= "; ";
}
$mode = "RecName";
}
elsif($line =~ /^DE SubName:/){
if($mode eq "RecName"){
$description .= "; ";
}
$mode = "RecName";
}
elsif($line =~ /^DE AltName:/){
$mode = "AltName";
}
elsif($line =~ /^DE Contains:/){
if($mode eq "Contains"){
$description .= "; ";
}
elsif($mode eq "Includes"){
$description .= "][Contains ";
}
else{
$description .= " [Contains ";
}
$mode = "Contains";
next;
}
elsif($line =~ /^DE Includes:/){
if($mode eq "Includes"){
$description .= "; ";
}
elsif($mode eq "Contains"){
$description .= "][Includess";
}
else{
$description .= " [Includes ";
}
$mode = "Includes";
next;
}
elsif($line =~ /^DE Flags: (.*);/){
$flags .= "$1 ";
next;
}
# now get the data
if($line =~ /^DE RecName: Full=(.*);/){
$name .= $1;
}
elsif($line =~ /RecName: Full=(.*);/){
$description .= $1;
}
elsif($line =~ /SubName: Full=(.*);/){
$name .= $1;
}
elsif($line =~ /AltName: Full=(.*);/){
$description .= "(".$1.")";
}
elsif($line =~ /Short=(.*);/){
$description .= "(".$1.")";
}
elsif($line =~ /EC=(.*);/){
$description .= "(EC ".$1.")";
}
elsif($line =~ /Allergen=(.*);/){
$description .= "(Allergen ".$1.")";
}
elsif($line =~ /INN=(.*);/){
$description .= "(".$1.")";
}
elsif($line =~ /Biotech=(.*);/){
$description .= "(".$1.")";
}
elsif($line =~ /CD_antigen=(.*);/){
$description .= "(".$1." antigen)";
}
else{
print STDERR "unable to process *$line* for $acc\n";
}
}
if($mode eq "Contains" or $mode eq "Includes"){
$description .= "]";
}
$description =~ s/^\s*//g;
$description =~ s/\s*$//g;
$xref->{DESCRIPTION} = $description;
$xref->{DESCRIPTION} = $name.$flags.$description;
push @xrefs, $xref;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment