my$sth=$ensdb->prepare("select t.id,cl.embl_id from transcript as t, exon_transcript as et, clone as cl, contig as c, exon as e where t.id=et.transcript and et.exon = e.id and e.contig = c.internal_id and c.clone = cl.internal_id");
@@ -26,6 +26,9 @@ my ($mapping,$xrefs,$dbmap,$refseq,$out);
my%map;
my%hash;
my%ref_map;
my%ens2embl;
my%sp2embl;
&GetOptions(
...
...
@@ -69,13 +72,23 @@ while (<XREF>) {
#SP P31946 EMBL X57346
my($xrdb,$xrac,$db,$id)=split(/\t/,$_);
my$both="$db:$id";
if($xrdbne"ENSEMBL"){
my$both="$db:$id";
if(!defined$hash{$xrac}){
$hash{$xrac}=[];
}
if(!defined$hash{$xrac}){
$hash{$xrac}=[];
}
push(@{$hash{$xrac}},$both);
push(@{$hash{$xrac}},$both);
}
if($xrdbeq"ENSEMBL"){
push(@{$ens2embl{$xrac}},$id);
}
if(($xrdbeq"SP")&&($dbeq"EMBL")){
push(@{$sp2embl{$xrac}},$id);
}
}
while(<MAP>){
...
...
@@ -83,13 +96,33 @@ while (<MAP>) {
#P01111 COBP00000000001 100 PRIMARY
my($xr,$ens,$perc,$tag)=split(/\t/,$_);
if($tageq"PRIMARY"){
if(($tageq"PRIMARY")||($tageq"DUPLICATE")){
#Its a hack an another solution will have to be found, if the external known gene is a refseq protein accession number get back the equivalent refseq DNA accession number
#If less than 20, either duplicate if percentage of identity close to the PRIMARY labelled as DUPLICATE or labelled as PSEUDO. DUPLICATEs can also be used for the mapping