diff --git a/misc-scripts/xref_mapping/XrefParser/HGNC_CCDSParser.pm b/misc-scripts/xref_mapping/XrefParser/HGNC_CCDSParser.pm index 7e0811051f7d844301deb7f0ca74417be37ec5df..64106e58c1a1eb77e825a56666b9f793ffe97aad 100644 --- a/misc-scripts/xref_mapping/XrefParser/HGNC_CCDSParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/HGNC_CCDSParser.pm @@ -90,7 +90,7 @@ sub run_script { $sth->execute() or croak( $dbi2->errstr() ); while ( my @row = $sth->fetchrow_array() ) { if(defined($trans_id_to_stable_id{$row[0]})){ - $ccds_to_stable_id{$row[1]} = $trans_id_to_stable_id{$row[0]}; + push @{$ccds_to_stable_id{$row[1]}}, $trans_id_to_stable_id{$row[0]}; } else{ print "NO transcript_stable_id for for ".$row[0]."\n"; @@ -145,13 +145,19 @@ sub run_script { my $xref_count = 0; my $no_ccds_to_hgnc = 0; + my $direct_count = 0; foreach my $ccds (keys %ccds_to_stable_id){ if(defined($ccds_to_hgnc{$ccds})){ my $hgnc = $ccds_to_hgnc{$ccds}; $hgnc =~ s/HGNC://; my $xref_id = $self->add_xref($hgnc, $version{$hgnc} , $label{$hgnc}||$hgnc , $description{$hgnc}, $source_id, $species_id, "DIRECT"); - $self->add_direct_xref($xref_id, $ccds_to_stable_id{$ccds}, "Transcript", ""); + + foreach my $stable_id (@{$ccds_to_stable_id{$ccds}}){ + $self->add_direct_xref($xref_id, $stable_id, "Transcript", ""); + $direct_count++; + } + $xref_count++; if(defined($syn_hash->{$hgnc})){ @@ -168,7 +174,7 @@ sub run_script { } } $add_syn_sth->finish; - print "$no_ccds_to_hgnc missed as no hgnc for the ccds. Added $xref_count HGNC xrefs via CCDS\n" if($verbose); + print "$no_ccds_to_hgnc missed as no hgnc for the ccds. Added $xref_count HGNC xrefs via CCDS and $direct_count direct xrefs\n" if($verbose); return 0; } diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeq_CCDSParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeq_CCDSParser.pm index 8a704ee89a1c13bf70a34db3562ca7995fc33f36..fe64316796679f674813c62d78ec1b08c55790f7 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeq_CCDSParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeq_CCDSParser.pm @@ -73,7 +73,7 @@ sub run_script { my %ensembl_type; my %old_xref; while (my @row = $sth->fetchrow_array()) { - $ensembl_stable_id{$access} = $stable_id; + push @{$ensembl_stable_id{$access}}, $stable_id; $ensembl_type{$access} = $type; $old_xref{$access} = $old_xref_id; } @@ -83,9 +83,13 @@ sub run_script { my $line_count = 0; my $xref_count = 0; + my $direct_count = 0; my %seen; my %old_to_new; +# +# dbi2 is the ccds database +# my $dbi2 = $self->dbi2($host, $port, $user, $dbname, $pass); if(!defined($dbi2)){ return 1; @@ -97,8 +101,9 @@ sub run_script { # get ccds -> xref transcript_id ensembl_stable_id{CCDS1} = ENST00001 # get ccds -> internal transcript_id ccds_to_internal_id(CCDS1} = 12345 - $sql = 'select x.dbprimary_acc, ox.ensembl_id from xref x, object_xref ox, external_db e where x.xref_id = ox.xref_id and x.external_db_id = e.external_db_id and e.db_name like ?'; + $sql = 'select x.dbprimary_acc, ox.ensembl_id from xref x, object_xref ox, external_db e where x.xref_id = ox.xref_id and x.external_db_id = e.external_db_id and e.db_name like ? order by x.version'; +# order by version added so that the hash gets overwritten with the latest version. # calculate internal_id -> xref transcript_id @@ -141,7 +146,11 @@ sub run_script { my $xref_id = $self->add_xref($refseq, $version{$refseq} , $label{$refseq}||$refseq , $description{$refseq}, $new_source_id, $species_id, "DIRECT"); - $self->add_direct_xref($xref_id, $internal_to_stable_id{$internal_id}, "Transcript", ""); + + foreach my $stable_id (@{$internal_to_stable_id{$internal_id}}){ + $self->add_direct_xref($xref_id, $stable_id, "Transcript", ""); + $direct_count++; + } $old_to_new{$old_xref{$refseq}} = $xref_id; $xref_count++; @@ -166,7 +175,7 @@ sub run_script { } - print "Parsed $line_count RefSeq_dna identifiers from $file, added $xref_count xrefs and $xref_count direct_xrefs from $line_count lines.\n" if ($verbose); + print "Parsed $line_count RefSeq_dna identifiers from $file, added $xref_count xrefs and $direct_count direct_xrefs from $line_count lines.\n" if ($verbose); return 0;