Skip to content
Snippets Groups Projects
Commit b9458db0 authored by Ian Longden's avatar Ian Longden
Browse files

CCDS -> Ensembl transcript is not a one to one mapping so cannot use a key...

CCDS -> Ensembl transcript is not a one to one mapping so cannot use a key value pairs in hash. Hence now the hash uses a anonymous array to stire multiple values
parent 52e726a1
No related branches found
No related tags found
No related merge requests found
......@@ -90,7 +90,7 @@ sub run_script {
$sth->execute() or croak( $dbi2->errstr() );
while ( my @row = $sth->fetchrow_array() ) {
if(defined($trans_id_to_stable_id{$row[0]})){
$ccds_to_stable_id{$row[1]} = $trans_id_to_stable_id{$row[0]};
push @{$ccds_to_stable_id{$row[1]}}, $trans_id_to_stable_id{$row[0]};
}
else{
print "NO transcript_stable_id for for ".$row[0]."\n";
......@@ -145,13 +145,19 @@ sub run_script {
my $xref_count = 0;
my $no_ccds_to_hgnc = 0;
my $direct_count = 0;
foreach my $ccds (keys %ccds_to_stable_id){
if(defined($ccds_to_hgnc{$ccds})){
my $hgnc = $ccds_to_hgnc{$ccds};
$hgnc =~ s/HGNC://;
my $xref_id = $self->add_xref($hgnc, $version{$hgnc} , $label{$hgnc}||$hgnc ,
$description{$hgnc}, $source_id, $species_id, "DIRECT");
$self->add_direct_xref($xref_id, $ccds_to_stable_id{$ccds}, "Transcript", "");
foreach my $stable_id (@{$ccds_to_stable_id{$ccds}}){
$self->add_direct_xref($xref_id, $stable_id, "Transcript", "");
$direct_count++;
}
$xref_count++;
if(defined($syn_hash->{$hgnc})){
......@@ -168,7 +174,7 @@ sub run_script {
}
}
$add_syn_sth->finish;
print "$no_ccds_to_hgnc missed as no hgnc for the ccds. Added $xref_count HGNC xrefs via CCDS\n" if($verbose);
print "$no_ccds_to_hgnc missed as no hgnc for the ccds. Added $xref_count HGNC xrefs via CCDS and $direct_count direct xrefs\n" if($verbose);
return 0;
}
......
......@@ -73,7 +73,7 @@ sub run_script {
my %ensembl_type;
my %old_xref;
while (my @row = $sth->fetchrow_array()) {
$ensembl_stable_id{$access} = $stable_id;
push @{$ensembl_stable_id{$access}}, $stable_id;
$ensembl_type{$access} = $type;
$old_xref{$access} = $old_xref_id;
}
......@@ -83,9 +83,13 @@ sub run_script {
my $line_count = 0;
my $xref_count = 0;
my $direct_count = 0;
my %seen;
my %old_to_new;
#
# dbi2 is the ccds database
#
my $dbi2 = $self->dbi2($host, $port, $user, $dbname, $pass);
if(!defined($dbi2)){
return 1;
......@@ -97,8 +101,9 @@ sub run_script {
# get ccds -> xref transcript_id ensembl_stable_id{CCDS1} = ENST00001
# get ccds -> internal transcript_id ccds_to_internal_id(CCDS1} = 12345
$sql = 'select x.dbprimary_acc, ox.ensembl_id from xref x, object_xref ox, external_db e where x.xref_id = ox.xref_id and x.external_db_id = e.external_db_id and e.db_name like ?';
$sql = 'select x.dbprimary_acc, ox.ensembl_id from xref x, object_xref ox, external_db e where x.xref_id = ox.xref_id and x.external_db_id = e.external_db_id and e.db_name like ? order by x.version';
# order by version added so that the hash gets overwritten with the latest version.
# calculate internal_id -> xref transcript_id
......@@ -141,7 +146,11 @@ sub run_script {
my $xref_id = $self->add_xref($refseq, $version{$refseq} , $label{$refseq}||$refseq ,
$description{$refseq}, $new_source_id, $species_id, "DIRECT");
$self->add_direct_xref($xref_id, $internal_to_stable_id{$internal_id}, "Transcript", "");
foreach my $stable_id (@{$internal_to_stable_id{$internal_id}}){
$self->add_direct_xref($xref_id, $stable_id, "Transcript", "");
$direct_count++;
}
$old_to_new{$old_xref{$refseq}} = $xref_id;
$xref_count++;
......@@ -166,7 +175,7 @@ sub run_script {
}
print "Parsed $line_count RefSeq_dna identifiers from $file, added $xref_count xrefs and $xref_count direct_xrefs from $line_count lines.\n" if ($verbose);
print "Parsed $line_count RefSeq_dna identifiers from $file, added $xref_count xrefs and $direct_count direct_xrefs from $line_count lines.\n" if ($verbose);
return 0;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment