Skip to content
Snippets Groups Projects
Commit c6cbd736 authored by Ian Longden's avatar Ian Longden
Browse files

sort out synonym and deleted accession entrys

parent f012b1aa
No related branches found
No related tags found
No related merge requests found
......@@ -6,22 +6,42 @@
# to dump this from an ensembl datbase use something like
# mysql -u ensro -h ecs2 -P 3365 -e "select x.dbprimary_acc, ox.ensembl_object_type, ox.ensembl_id from xref x, object_xref ox, external_db edb where edb.db_name like 'Uniprot/SWISSPROT' and x.external_db_id=edb.external_db_id and x.xref_id=ox.xref_id order by x.xref_id" homo_sapiens_core_25_34e > ensembl_swissprot_translations.txt
# ADDITIONALLY
# read in the synonym conversion. This is created with something like:-
#mysql -hecs4 -P3350 -uensadmin -pensembl -Dianl_xref_test -e"select x.accession, s.accession from synonym a, xref x, xref s where a.xref_id = x.xref_id and a.synonym_xref_id = s.xref_id " > xref_synonym.txt
#ALSO get list of deleted accesions no need to look at these.
#down load wget http://us.expasy.org/txt/delac_tr.txt
#remove html rubbish and save.
use strict;
use Getopt::Long;
my ( $old, $new, $pass, $port, $dbname );
my ( $old, $new, $del, $syn, $pass, $port, $dbname );
my %deleted;
my %synonym;
GetOptions( "old=s", \$old,
"new=s", \$new);
"new=s", \$new,
"del=s", \$del,
"syn=s", \$syn);
if( !($old && $new) ) {
if( !($old && $new && $syn && $del) ) {
usage();
exit(1);
}
# read files into xref/object hashes
# TODO - more than one mapping?
get_deleted($del);
get_synonym($syn);
my ($new_x2e_r, $new_e2x_r) = read_mappings($new);
my ($old_x2e_r, $old_e2x_r) = read_mappings($old);
......@@ -31,6 +51,38 @@ check_non_translations($new_e2x_r);
compare($old_x2e_r, $new_x2e_r, "xref");
compare($old_e2x_r, $new_e2x_r, "ensembl_object");
sub get_deleted {
my ($file) = @_;
open(FILE,"<".$file) || die "Could not open $file\n";
while(<FILE>){
chomp;
$deleted{$_} = 1;
}
close FILE;
}
sub get_synonym {
my ($file) = @_;
open(FILE,"<".$file) || die "Could not open $file\n";
<FILE>; #junk first line
while(<FILE>){
chomp;
my @arr = split;
$synonym{$arr[1]} = $arr[0];
}
close FILE;
}
# ----------------------------------------
# Compare mappings keyed on xref
......@@ -117,6 +169,7 @@ sub usage {
print << "EOF";
Usage: compare_mapping.pl -old <old mapping file> -new <new mapping file>
-del <deleted acc list file> -syn <xref synonym file>
Mapping files should be in the following format:
......@@ -134,6 +187,8 @@ sub read_mappings {
my %xref_to_ensembl;
my %ensembl_to_xref;
my $del=0;
my $syn=0;
open (FILE, $filename) || die "Can't read $filename\n";
my $dummy = <FILE>; # skip first line
......@@ -142,9 +197,18 @@ sub read_mappings {
my ($xref_id, $type, $ensembl_id) = split;
# TODO - better way of handling type?
my $value = $ensembl_id . "." . $type;
push @{$xref_to_ensembl{$xref_id}}, $value;
push @{$ensembl_to_xref{$value}}, $xref_id;
if(!defined($deleted{$xref_id})){
if(defined($synonym{$xref_id})){
$xref_id = $synonym{$xref_id};
$syn++;
}
my $value = $ensembl_id . "." . $type;
push @{$xref_to_ensembl{$xref_id}}, $value;
push @{$ensembl_to_xref{$value}}, $xref_id;
}
else{
$del++;
}
}
......@@ -156,6 +220,8 @@ sub read_mappings {
}
print "Read $i mappings from $filename\n";
print "$del entries ignored due to deletions\n";
print "$syn entries changed to synonym values\n";
return (\%xref_to_ensembl, \%ensembl_to_xref);
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment