Commit 1cd902d0 authored by Ian Longden's avatar Ian Longden
Browse files

Use registry to get vega and core databases. Plus now only take the core ->...

Use registry to get vega and core databases. Plus now only take the core -> vega mapping and not vice versa (to be consistent)
parent bb93cab9
......@@ -7,6 +7,8 @@ use base qw( XrefParser::BaseParser );
use strict;
use Bio::EnsEMBL::DBSQL::DBAdaptor;
use Bio::EnsEMBL::Registry;
my $reg = "Bio::EnsEMBL::Registry";
#my $dbi2;
......@@ -26,23 +28,96 @@ sub run_script {
my ($type, $my_args) = split(/:/,$file);
my $vuser ="ensro";
my $vhost;
my $vport;
my $vdbname;
my $vpass;
my $cuser ="ensro";
my $chost;
my $cport;
my $cdbname;
my $cpass;
my $host = "ens-staging2";
my $user = "ensro";
my $host ="ens-staging";
my $port = "3306";
my $dbname = "";
my $pass;
if($my_args =~ /host[=][>](\S+?)[,]/){
$host = $1;
}
if($my_args =~ /port[=][>](\S+?)[,]/){
$port = $1;
if($my_args =~ /user[=][>](\S+?)[,]/){
$user = $1;
}
if($my_args =~ /vhost[=][>](\S+?)[,]/){
$vhost = $1;
}
if($my_args =~ /vport[=][>](\S+?)[,]/){
$vport = $1;
}
if($my_args =~ /vdbname[=][>](\S+?)[,]/){
$vdbname = $1;
}
if($my_args =~ /vpass[=][>](\S+?)[,]/){
$vpass = $1;
}
if($my_args =~ /vuser[=][>](\S+?)[,]/){
$vuser = $1;
}
if($my_args =~ /chost[=][>](\S+?)[,]/){
$chost = $1;
}
if($my_args =~ /cport[=][>](\S+?)[,]/){
$cport = $1;
}
if($my_args =~ /cdbname[=][>](\S+?)[,]/){
$cdbname = $1;
}
if($my_args =~ /cpass[=][>](\S+?)[,]/){
$cpass = $1;
}
if($my_args =~ /dbname[=][>](\S+?)[,]/){
$dbname = $1;
if($my_args =~ /cuser[=][>](\S+?)[,]/){
$cuser = $1;
}
if($my_args =~ /pass[=][>](\S+?)[,]/){
$pass = $1;
my $vega_dbc;
my $core_dbc;
if(defined($vdbname)){
print "Using $host $vdbname for Vega and cdbname for Core\n";
$vega_dbc = $self->dbi2($vhost, $vport, $vuser, $vdbname, $vpass);
if(!defined($vega_dbc)){
print "Problem could not open connectipn to $vhost, $vport, $vuser, $vdbname, $vpass\n";
return 1;
}
$core_dbc = $self->dbi2($chost, $cport, $cuser, $cdbname, $cpass);
if(!defined($core_dbc)){
print "Problem could not open connectipn to $chost, $cport, $cuser, $cdbname, $cpass\n";
return 1;
}
}
else{
$reg->load_registry_from_db(
-host => $host,
-user => $user);
$vega_dbc = $reg->get_adaptor("mouse","vega","slice");
if(!defined($vega_dbc)){
print "Could not connect to mouse vega database using load_registry_from_db $host $user\n";
return 1;
}
$vega_dbc = $vega_dbc->dbc;
$core_dbc = $reg->get_adaptor("mouse","core","slice");
if(!defined($core_dbc)){
print "Could not connect to mouse core database using load_registry_from_db $host $user\n";
return 1;
}
$core_dbc= $core_dbc->dbc;
}
$source_id = XrefParser::BaseParser->get_source_id_for_source_name("MGI","vega");
......@@ -51,41 +126,30 @@ sub run_script {
my $sql = 'select tsi.stable_id, x.display_label from xref x, object_xref ox , transcript_stable_id tsi, external_db e where e.external_db_id = x.external_db_id and x.xref_id = ox.xref_id and tsi.transcript_id = ox.ensembl_id and e.db_name like ?';
my $hgnc_sql = 'select tsi.stable_id, x.dbprimary_acc from xref x, object_xref ox , transcript_stable_id tsi, gene g, external_db e, transcript t where t.gene_id = g.gene_id and g.gene_id = ox.ensembl_id and tsi.transcript_id = t.transcript_id and e.external_db_id = x.external_db_id and x.xref_id = ox.xref_id and ox.ensembl_object_type = "Gene" and e.db_name like "MGI"';
my $name_sql = 'select tsi.stable_id, x.dbprimary_acc from xref x, object_xref ox , transcript_stable_id tsi, gene g, external_db e, transcript t where t.gene_id = g.gene_id and g.gene_id = ox.ensembl_id and tsi.transcript_id = t.transcript_id and e.external_db_id = x.external_db_id and x.xref_id = ox.xref_id and ox.ensembl_object_type = "Gene" and e.db_name like "MGI"';
my %ott_to_hgnc;
my %ott_to_mgi;
my %ott_to_enst;
my $dbi2 = $self->dbi2($host, $port, $user, $dbname, $pass);
if(!defined($dbi2)){
return 1;
}
my $sth = $dbi2->prepare($sql); # funny number instead of stable id ?????
$sth->execute("ENST_CDS") or croak( $dbi2->errstr() );
while ( my @row = $sth->fetchrow_array() ) {
$ott_to_enst{$row[0]} = $row[1];
}
$sth->finish;
$sth = $dbi2->prepare($sql);
$sth->execute("ENST_ident") or croak( $dbi2->errstr() );
while ( my @row = $sth->fetchrow_array() ) {
$ott_to_enst{$row[0]} = $row[1];
my $sth = $core_dbc->prepare($sql) || die "Could not prepare for core $sql\n";
foreach my $external_db (qw(OTTT shares_CDS_and_UTR_with_OTTT shares_CDS_with_ENST)){
$sth->execute($external_db) or croak( $core_dbc->errstr());
while ( my @row = $sth->fetchrow_array() ) {
$ott_to_enst{$row[1]} = $row[0];
}
}
$sth->finish;
print "We have ".scalar(%ott_to_enst)." ott to enst entries\n " if($verbose);
$sth = $dbi2->prepare($hgnc_sql);
$sth->execute() or croak( $dbi2->errstr() );
$sth = $vega_dbc->prepare($name_sql) || die "Could not prepare for vega $sql\n";;
$sth->execute() or croak( $vega_dbc->errstr() );
while ( my @row = $sth->fetchrow_array() ) {
$ott_to_hgnc{$row[0]} = $row[1];
$ott_to_mgi{$row[0]} = $row[1];
}
$sth->finish;
print "We have ".scalar(%ott_to_hgnc)." ott to mgi entries\n" if($verbose);
print "We have ".scalar(%ott_to_mgi)." ott to mgi entries\n" if($verbose);
my $line_count = 0;
......@@ -140,7 +204,7 @@ sub run_script {
}
$sth->finish;
if($hgnc_loaded_count == 0){
die "No point continuing no hgncs there\n";
die "No point continuing no MGIs there\n";
}
......@@ -148,11 +212,10 @@ sub run_script {
my $ignore_examples ="";
my %acc;
foreach my $key ( keys %ott_to_hgnc){
foreach my $key ( keys %ott_to_mgi){
if(defined($ott_to_enst{$key} )){
my $hgnc = $ott_to_hgnc{$key};
# $hgnc =~ s/HGNC://;
my $hgnc = $ott_to_mgi{$key};
my $stable_id = $ott_to_enst{$key};
if(!defined($label{$hgnc})){
......
......@@ -2,15 +2,11 @@ package XrefParser::MGI_curated_transcriptParser;
use strict;
use File::Basename;
use Bio::EnsEMBL::Registry;
use base qw( XrefParser::BaseParser );
use strict;
use Bio::EnsEMBL::DBSQL::DBAdaptor;
my $reg = "Bio::EnsEMBL::Registry";
#my $dbi2;
if (!defined(caller())) {
if (scalar(@ARGV) != 1) {
......@@ -31,31 +27,97 @@ sub run_script {
my ($type, $my_args) = split(/:/,$file);
my $user ="ensro";
my $host;
my $port;
my $dbname;
my $pass;
my $vuser ="ensro";
my $vhost;
my $vport;
my $vdbname;
my $vpass;
my $cuser ="ensro";
my $chost;
my $cport;
my $cdbname;
my $cpass;
my $host = "ens-staging2";
my $user = "ensro";
if($my_args =~ /host[=][>](\S+?)[,]/){
$host = $1;
}
if($my_args =~ /port[=][>](\S+?)[,]/){
$port = $1;
if($my_args =~ /user[=][>](\S+?)[,]/){
$user = $1;
}
if($my_args =~ /vhost[=][>](\S+?)[,]/){
$vhost = $1;
}
if($my_args =~ /vport[=][>](\S+?)[,]/){
$vport = $1;
}
if($my_args =~ /dbname[=][>](\S+?)[,]/){
$dbname = $1;
if($my_args =~ /vdbname[=][>](\S+?)[,]/){
$vdbname = $1;
}
if($my_args =~ /pass[=][>](\S+?)[,]/){
$pass = $1;
if($my_args =~ /vpass[=][>](\S+?)[,]/){
$vpass = $1;
}
if($my_args =~ /user[=][>](\S+?)[,]/){
$user = $1;
if($my_args =~ /vuser[=][>](\S+?)[,]/){
$vuser = $1;
}
print "Using $host $dbname for Vega\n";
if($my_args =~ /chost[=][>](\S+?)[,]/){
$chost = $1;
}
if($my_args =~ /cport[=][>](\S+?)[,]/){
$cport = $1;
}
if($my_args =~ /cdbname[=][>](\S+?)[,]/){
$cdbname = $1;
}
if($my_args =~ /cpass[=][>](\S+?)[,]/){
$cpass = $1;
}
if($my_args =~ /cuser[=][>](\S+?)[,]/){
$cuser = $1;
}
my $vega_dbc;
my $core_dbc;
if(defined($vdbname)){
print "Using $host $vdbname for Vega and cdbname for Core\n";
$vega_dbc = $self->dbi2($vhost, $vport, $vuser, $vdbname, $vpass);
if(!defined($vega_dbc)){
print "Problem could not open connectipn to $vhost, $vport, $vuser, $vdbname, $vpass\n";
return 1;
}
$core_dbc = $self->dbi2($chost, $cport, $cuser, $cdbname, $cpass);
if(!defined($core_dbc)){
print "Problem could not open connectipn to $chost, $cport, $cuser, $cdbname, $cpass\n";
return 1;
}
}
else{
$reg->load_registry_from_db(
-host => $host,
-user => $user);
$vega_dbc = $reg->get_adaptor("mouse","vega","slice");
if(!defined($vega_dbc)){
print "Could not connect to mouse vega database using load_registry_from_db $host $user\n";
return 1;
}
$vega_dbc = $vega_dbc->dbc;
$core_dbc = $reg->get_adaptor("mouse","core","slice");
if(!defined($core_dbc)){
print "Could not connect to mouse core database using load_registry_from_db $host $user\n";
return 1;
}
$core_dbc= $core_dbc->dbc;
}
my $clone_source_id =
$self->get_source_id_for_source_name('Clone_based_vega_transcript');
my $curated_source_id =
......@@ -66,35 +128,26 @@ sub run_script {
my %ott_to_vega_name;
my %ott_to_enst;
my $dbi2 = $self->dbi2($host, $port, $user, $dbname, $pass);
if(!defined($dbi2)){
return 1;
}
my $sth = $dbi2->prepare($sql); # funny number instead of stable id ?????
$sth->execute("Vega_transcript") or croak( $dbi2->errstr() );
while ( my @row = $sth->fetchrow_array() ) {
$ott_to_vega_name{$row[0]} = $row[1];
}
$sth->finish;
$sth = $dbi2->prepare($sql); # funny number instead of stable id ?????
$sth->execute("ENST_CDS") or croak( $dbi2->errstr() );
while ( my @row = $sth->fetchrow_array() ) {
$ott_to_enst{$row[0]} = $row[1];
my $sth = $core_dbc->prepare($sql) || die "Could not prepare for core $sql\n";
foreach my $external_db (qw(OTTT shares_CDS_and_UTR_with_OTTT shares_CDS_with_ENST)){
$sth->execute($external_db) or croak( $core_dbc->errstr());
while ( my @row = $sth->fetchrow_array() ) {
$ott_to_enst{$row[1]} = $row[0];
}
}
$sth->finish;
$sth = $dbi2->prepare($sql);
$sth->execute("ENST_ident") or croak( $dbi2->errstr() );
$sth = $vega_dbc->prepare($sql) || die "Could not prepare for vega $sql\n";
$sth->execute("Vega_transcript") or croak( $vega_dbc->errstr() );
while ( my @row = $sth->fetchrow_array() ) {
$ott_to_enst{$row[0]} = $row[1];
next if ($row[1] =~ /^OTT/);
$ott_to_vega_name{$row[0]} = $row[1];
}
$sth->finish;
my $xref_count = 0;
foreach my $ott (keys %ott_to_enst){
......@@ -104,7 +157,7 @@ sub run_script {
$name =~ s/WU://;
if($name =~ /[.]/){
$id = $clone_source_id;
$name =~ s/[.]\d+//; #remove .number
# keep it apparently $name =~ s/[.]\d+//; #remove .number
}
my $xref_id = $self->add_xref($name, "" , $name , "", $id, $species_id, "DIRECT");
$xref_count++;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment