Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl
Commits
d5ad4a4a
Commit
d5ad4a4a
authored
Feb 05, 2001
by
Emmanuel Mongin
Browse files
Script to postprocess pmatch, needs doc...
parent
8ae2a613
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
188 additions
and
0 deletions
+188
-0
misc-scripts/protein_match/process_pmach.pl
misc-scripts/protein_match/process_pmach.pl
+188
-0
No files found.
misc-scripts/protein_match/process_pmach.pl
0 → 100644
View file @
d5ad4a4a
#use strict;
use
Getopt::
Long
;
my
(
$ens
,
$sp
,
$refseq
);
&GetOptions
(
'
ens:s
'
=>\
$ens
,
'
sp:s
'
=>\
$sp
,
'
refseq:s
'
=>\
$refseq
);
&runpmatch
();
&postprocesspmatch
(
$sp
);
&postprocesspmatch
(
$refseq
);
&finalprocess
(
$sp
);
&finalprocess
(
$refseq
);
sub
runpmatch
{
print
STDERR
"
Running pmatch
\n
";
#Run pmatch and store the data in files which will be kept for debugging
my
$pmatch1
=
"
/nfs/griffin2/rd/bin.ALPHA/pmatch
$sp
$ens
> ens_sp_rawpmatch
";
my
$pmatch2
=
"
/nfs/griffin2/rd/bin.ALPHA/pmatch
$refseq
$ens
> ens_refseq_rawpmatch
";
system
(
$pmatch1
);
# == 0 or die "$0\Error running '$pmatch1' : $!";
system
(
$pmatch2
);
# == 0 or die "$0\Error running '$pmatch2' : $!";
}
sub
postprocesspmatch
{
my
(
$db
)
=
@_
;
my
%hash1
;
#open (OUT, ">../data/$db.processed");
#Post process the raw data from pmatch
if
(
$db
eq
$sp
)
{
print
STDERR
"
Postprocessing pmatch for SP mapping
\n
";
open
(
OUT
,
"
>ens_sp.processed
");
open
(
PROC
,
"
ens_sp_rawpmatch
");
}
else
{
print
STDERR
"
Postprocessing pmatch for REFSEQ mapping
\n
";
open
(
OUT
,
"
>ens_refseq.processed
");
open
(
PROC
,
"
ens_refseq_rawpmatch
")
||
die
"
Can't open file ens_refseq_rawpmatch
\n
";
}
while
(
<
PROC
>
)
{
## 20 Q9UN99 36 55 8.1 ENSP00000051351 63 82 7.8
my
(
$len
,
$id
,
$start
,
$end
,
$perc
,
$query
,
$qst
,
$qend
,
$qperc
)
=
split
;
#print STDERR "$id:$query";
my
$uniq
=
"
$id
:
$query
";
$hash1
{
$uniq
}
+=
$qperc
;
}
foreach
my
$key
(
keys
%hash1
)
{
(
$a
,
$b
)
=
split
(
/:/
,
$key
);
print
OUT
"
$a
\t
$b
\t
$hash1
{
$key
}
\n
";
}
close
(
OUT
);
}
sub
finalprocess
{
my
(
$db
)
=
@_
;
if
(
$db
eq
$sp
)
{
print
STDERR
"
Getting final mapping for SP mapping
\n
";
open
(
PROC
,
"
ens_sp.processed
");
open
(
OUT
,
"
>ens_sp.final
");
}
else
{
print
STDERR
"
Getting final mapping for REFSEQ mapping
\n
";
open
(
PROC
,
"
ens_refseq.processed
")
||
die
"
Can' open file ens_refseq.processed
\n
";
open
(
OUT
,
"
>refseq.final
");
}
my
%hash2
;
while
(
<
PROC
>
)
{
my
(
$ens
,
$known
,
$perc
)
=
split
;
#if ($perc > 100) {
# print "$ens\t$known\t$perc\n";
#}
if
(
!
defined
$hash2
{
$known
}
)
{
$hash2
{
$known
}
=
[]
;
}
$p
=
NamePerc
->
new
;
$p
->
name
(
$ens
);
$p
->
perc
(
$perc
);
push
(
@
{
$hash2
{
$known
}},
$p
);
}
foreach
my
$know
(
keys
%hash2
)
{
@array
=
@
{
$hash2
{
$know
}};
@array
=
sort
{
$b
->
perc
<=>
$a
->
perc
}
@array
;
$top
=
shift
@array
;
print
OUT
"
$know
\t
",
$top
->
name
,"
\t
",
$top
->
perc
,"
\t
PRIMARY
\n
";
foreach
$ens
(
@array
)
{
if
(
$ens
->
perc
>
$top
->
perc
)
{
die
"
Not good....
";
}
}
if
(
scalar
(
@array
)
>=
20
)
{
foreach
my
$repeat
(
@array
)
{
print
OUT
"
$know
\t
",
$repeat
->
name
,"
\t
",
$repeat
->
perc
,"
\t
REPEAT
\n
";
}
}
if
(
scalar
(
@array
)
<
20
)
{
foreach
my
$duplicate
(
@array
)
{
if
(
$duplicate
->
perc
+
1
>=
$top
->
perc
)
{
print
OUT
"
$know
\t
",
$duplicate
->
name
,"
\t
",
$duplicate
->
perc
,"
\t
DUPLICATE
\n
";
}
else
{
print
OUT
"
$know
\t
",
$duplicate
->
name
,"
\t
",
$duplicate
->
perc
,"
\t
PSEUDO
\n
";
}
}
}
}
close
(
OUT
);
}
package
NamePerc
;
sub
new
{
my
$class
=
shift
;
my
$self
=
{};
bless
$self
,
$class
;
return
$self
;
}
=head2 name
Title : name
Usage : $obj->name($newval)
Function:
Returns : value of name
Args : newvalue (optional)
=cut
sub
name
{
my
$obj
=
shift
;
if
(
@
_
)
{
my
$value
=
shift
;
$obj
->
{'
name
'}
=
$value
;
}
return
$obj
->
{'
name
'};
}
=head2 perc
Title : perc
Usage : $obj->perc($newval)
Function:
Returns : value of perc
Args : newvalue (optional)
=cut
sub
perc
{
my
$obj
=
shift
;
if
(
@
_
)
{
my
$value
=
shift
;
$obj
->
{'
perc
'}
=
$value
;
}
return
$obj
->
{'
perc
'};
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment