Skip to content
Snippets Groups Projects
Commit e4910c72 authored by Emmanuel Mongin's avatar Emmanuel Mongin
Browse files

update

parent c91a8b1e
No related branches found
No related tags found
No related merge requests found
......@@ -217,9 +217,7 @@ if ($organism eq "human") {
#Its a curated Refseq, flag it as predicted
print OUT "$prot_ac\tRefSeq_pred\t$prot_ac\tRefSeq_pred\t$prot_ac\t\tPRED\n";
my ($mim) = $_ =~ /\/db_xref=\"MIM:(\d+)/;
my ($locus) = $_ =~ /\/db_xref=\"LocusID:(\d*)/;
}
close (REFSEQ);
......@@ -232,7 +230,8 @@ if ($organism eq "human") {
while (<GO>) {
chomp;
my @array = split (/\t/,$_);
print OUT "$array[1]\tSPTR\t$array[4]\tGO\t$array[4]\t\tXREF\n";
$array[9] =~ s/\'/\\\'/g;
print OUT "$array[1]\tSPTR\t$array[4]\tGO\t$array[4]\t$array[9]\tXREF\n";
}
}
......
......@@ -78,11 +78,11 @@ if ($organism = "human") {
open (REFSEQPRED,"$refseq_pred") || die "Can't open $refseq_pred\n";
#Read the file by genbank entries (separated by //)
$/ = "\/\/\n";
while (<REFSEQ>) {
while (<REFSEQPRED>) {
#This subroutine store for each NP (refseq protein accession number) its corresponding NM (DNA accession number)
my ($prot_ac) = $_ =~ /ACCESSION\s+(\S+)/;
my ($dna_ac) = $_ =~ /DBSOURCE REFSEQ: accession\s+(\w+)/;
#print STDERR "PROT: $prot_ac\t$dna_ac\n";
$ref_map_pred{$prot_ac} = $dna_ac;
}
#Put back the default (new line) for reading file
......@@ -138,6 +138,7 @@ while (<XMAP>) {
($xac) = $xac =~ /^(XP_\d+)/;
$xac = $ref_map_pred{$xac};
#print STDERR "XAC: $xac\n";
}
if ($xid =~ /^XP_\d+/) {
......@@ -181,7 +182,7 @@ MAPPING: while (<MAP>) {
my $m = $tid;
print STDERR "$queryid,$tid,$tag,$queryperc,$targetperc\n";
#print STDERR "$queryid,$tid,$tag,$queryperc,$targetperc\n";
if ($tid =~ /^NP_\d+/) {
......@@ -198,7 +199,9 @@ MAPPING: while (<MAP>) {
if ($tid =~ /^(\w+-\d+)/) {
($tid) = $tid =~ /^(\w+)-\d+/;
}
#print STDERR "TID: $tid\n";
if ((defined $tid) && (defined $map{$tid})) {
......
......@@ -58,6 +58,8 @@ my $pmatch = $conf{'pmatch'};
my $organism = $conf{'organism'};
my $refseq_pred = $conf{'refseq_pred_fa'};
$protfile = "test.fa";
my $protfile1 = "test_pred.fa";
if (($organism eq "human") || ($organism eq "mouse")) {
&parse_refseq;
......@@ -67,7 +69,7 @@ if ($organism eq "human") {
&parse_refseq_pred;
}
&parse_sptr;
#&parse_sptr;
&test_protfile;
......@@ -104,7 +106,7 @@ sub parse_sptr {
sub parse_refseq {
open (IN, "<$refseq") or die "Can't open $refseq\n";
open (OUT, ">$protfile") or die "Can't open $protfile\n";
open (OUT, ">>$protfile") or die "Can't open $protfile\n";
while(<IN>){
# eg >gi|4501893|ref|NP_001094.1| actinin, alpha 2 [Homo sapiens]
......@@ -130,7 +132,7 @@ sub parse_refseq {
sub parse_refseq_pred {
open (IN, "<$refseq_pred") or die "Can't open $refseq_pred\n";
open (OUT, ">$protfile") or die "Can't open $protfile\n";
open (OUT, ">>$protfile1") or die "Can't open $protfile\n";
while(<IN>){
# eg >gi|4501893|ref|NP_001094.1| actinin, alpha 2 [Homo sapiens]
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment