Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl
Commits
ac81f789
Commit
ac81f789
authored
Jul 31, 2012
by
Andy Yates
Browse files
Adding new script for autogenerating aliases
parent
aa689741
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
314 additions
and
0 deletions
+314
-0
misc-scripts/production_database/scripts/generate_default_aliases.pl
...s/production_database/scripts/generate_default_aliases.pl
+314
-0
No files found.
misc-scripts/production_database/scripts/generate_default_aliases.pl
0 → 100755
View file @
ac81f789
#!/usr/bin/env perl
use
strict
;
use
warnings
;
use
Getopt::
Long
qw( :config no_ignore_case )
;
use
Pod::
Usage
;
use
POSIX
;
use
Bio::EnsEMBL::DBSQL::
DBConnection
;
sub
run
{
my
(
$class
)
=
@_
;
my
$self
=
bless
(
{},
$class
);
$self
->
args
();
$self
->
check_opts
();
my
$species
=
$self
->
_species
();
foreach
my
$s
(
sort
{
$a
->
{
production
}
cmp
$b
->
{
production
}
}
values
%
{
$species
}
)
{
$self
->
v
(
'
Processing %s
',
$s
->
{
production
}
);
my
$aliases_to_add
=
$self
->
_aliases_to_add
(
$s
);
$self
->
_write_aliases
(
$aliases_to_add
,
$s
);
$self
->
v
('
Done
');
}
return
;
}
sub
args
{
my
(
$self
)
=
@_
;
my
$opts
=
{
# Master database location:
mhost
=>
'
ens-staging1
',
mport
=>
3306
,
mdatabase
=>
'
ensembl_production
',
species
=>
[]
,
write
=>
0
};
my
@cmd_opts
=
qw/
mhost|mh=s
mport|mP=i
muser|mu=s
mpass|mp=s
mdatabase|md=s
species|s=s@
verbose|v!
help
man
/
;
GetOptions
(
$opts
,
@cmd_opts
)
or
pod2usage
(
-
verbose
=>
1
,
-
exitval
=>
1
);
pod2usage
(
-
verbose
=>
1
,
-
exitval
=>
0
)
if
$opts
->
{
help
};
pod2usage
(
-
verbose
=>
2
,
-
exitval
=>
0
)
if
$opts
->
{
man
};
$self
->
{
opts
}
=
$opts
;
return
;
}
sub
check_opts
{
my
(
$self
)
=
@_
;
my
$o
=
$self
->
{
opts
};
foreach
my
$required
(
qw/mhost muser/
)
{
my
$msg
=
"
Required parameter --
${required}
was not given
";
pod2usage
(
-
msg
=>
$msg
,
-
verbose
=>
1
,
-
exitval
=>
1
)
if
!
$o
->
{
$required
};
}
if
(
!
@
{
$self
->
{
opts
}
->
{
species
}}){
my
$msg
=
"
Required parameter --species was not given
";
pod2usage
(
-
msg
=>
$msg
,
-
verbose
=>
1
,
-
exitval
=>
1
);
}
return
;
}
sub
_write_aliases
{
my
(
$self
,
$aliases
,
$species
)
=
@_
;
my
$dbc
=
$self
->
_production_dbc
();
$dbc
->
sql_helper
()
->
transaction
(
sub
{
my
$sql
=
'
insert into species_alias (species_id, alias, is_current, created_at) values (?,?,?, NOW())
';
my
$id
=
$species
->
{
id
};
$dbc
->
sql_helper
()
->
batch
(
-
SQL
=>
$sql
,
-
CALLBACK
=>
sub
{
my
(
$sth
)
=
@_
;
foreach
my
$a
(
@
{
$aliases
})
{
if
(
$self
->
{
opts
}
->
{
write
})
{
$sth
->
execute
(
$a
,
$id
);
}
else
{
$self
->
v
('
Would have inserted the alias %s for species_id %d
',
$a
,
$id
);
}
}
return
;
});
});
return
;
}
sub
_species
{
my
(
$self
)
=
@_
;
my
$dbc
=
$self
->
_production_dbc
();
my
$h
=
$dbc
->
sql_helper
();
my
$sql
=
<<'SQL';
select species_id, common_name, web_name, scientific_name, production_name, url_name
from species
where production_name like ?
and is_current = 1
SQL
my
%species
;
foreach
my
$species
(
@
{
$self
->
{
opts
}
->
{
species
}})
{
$self
->
v
('
Querying production for current species like %s
',
$species
);
$dbc
->
sql_helper
()
->
execute_no_return
(
-
SQL
=>
$sql
,
-
PARAMS
=>
[
$species
],
-
CALLBACK
=>
sub
{
my
(
$row
)
=
@_
;
my
(
$id
,
$common_name
,
$web_name
,
$scientific_name
,
$production_name
,
$url_name
)
=
@
{
$row
};
if
(
!
exists
$species
{
$id
})
{
$species
{
$id
}
=
{
id
=>
$id
,
production
=>
$production_name
,
common
=>
$common_name
,
web
=>
$web_name
,
scientific
=>
$scientific_name
,
url
=>
$url_name
};
}
return
;
});
}
#Enrich after executing
$self
->
_enrich
(
\
%species
);
return
\
%species
;
}
sub
_enrich
{
my
(
$self
,
$species
)
=
@_
;
foreach
my
$id
(
keys
%
{
$species
})
{
my
$s
=
$species
->
{
$id
};
$s
->
{
aliases
}
=
$self
->
_aliases
(
$s
);
$s
->
{
automatic_aliases
}
=
$self
->
_automatic_aliases
(
$s
);
}
return
;
}
sub
_aliases
{
my
(
$self
,
$s
)
=
@_
;
my
$dbc
=
$self
->
_production_dbc
();
my
$aliases
=
$dbc
->
sql_helper
()
->
execute_simple
(
-
SQL
=>
'
select alias from species_alias where is_current = 1 and species_id =?
',
-
PARAMS
=>
[
$s
->
{
id
}]
);
my
%hash
=
map
{
$_
=>
1
}
@
{
$aliases
};
return
\
%hash
;
}
sub
_automatic_aliases
{
my
(
$self
,
$species
)
=
@_
;
my
$production_name
=
$species
->
{
production
};
my
$automatic_aliases
=
{};
# *** Assume homo_sapiens ***
my
$alias
=
$production_name
;
#1). homo_sapiens
$automatic_aliases
->
{
$alias
}
=
1
;
#2). homo sapiens
$alias
=~
tr [_] [ ];
$automatic_aliases->{$alias} = 1;
#3). hsapiens
$production_name =~ /^(.)[
^
_
]
*
_
(
.*
)
$/
;
$alias
=
$
1
.
$
2
;
$automatic_aliases
->
{
$alias
}
=
1
;
#4). hsap
$production_name
=~
/^(.)[^_]*_(...).*$/
;
$alias
=
$
1
.
$
2
;
$automatic_aliases
->
{
$alias
}
=
1
;
#5). homosap
$production_name
=~
/^(...)[^_]*_(...).*$/
;
$alias
=
$
1
.
$
2
;
$automatic_aliases
->
{
$alias
}
=
1
;
return
$automatic_aliases
;
}
sub
_aliases_to_add
{
my
(
$self
,
$species
)
=
@_
;
my
@aliases_to_add
;
foreach
my
$autogenerated
(
keys
%
{
$species
->
{
automatic_aliases
}})
{
if
(
exists
$species
->
{
aliases
}
->
{
$autogenerated
})
{
$self
->
v
('
Skipping %s as it already registered for this species
',
$autogenerated
);
}
else
{
push
(
@aliases_to_add
,
$autogenerated
);
$self
->
v
('
%s is a new alias
',
$autogenerated
);
}
}
return
\
@aliases_to_add
;
}
sub
_production_dbc
{
my
(
$self
)
=
@_
;
my
$o
=
$self
->
{
opts
};
my
%args
=
(
-
HOST
=>
$o
->
{
mhost
},
-
PORT
=>
$o
->
{
mport
},
-
DBNAME
=>
$o
->
{
mdatabase
},
-
USER
=>
$o
->
{
muser
}
);
$args
{
-
PASS
}
=
$o
->
{
mpass
}
if
$o
->
{
mpass
};
return
Bio::EnsEMBL::DBSQL::
DBConnection
->
new
(
%args
);
}
sub
v
{
my
(
$self
,
$msg
,
@args
)
=
@_
;
return
unless
$self
->
{
opts
}
->
{
verbose
};
my
$s_msg
=
sprintf
(
$msg
,
@args
);
my
(
$sec
,
$min
,
$hour
,
$mday
,
$mon
,
$year
,
$wday
,
$yday
,
$isdst
)
=
localtime
(
time
()
);
print
sprintf
(
"
[%02d-%02d-%04d %02d:%02d:%02d] %s
\n
",
$mday
,
$mon
,
$year
+
1900
,
$hour
,
$min
,
$sec
,
$s_msg
);
return
;
}
__PACKAGE__
->
run
();
__END__
=pod
=head1 NAME
generate_default_aliases.pl
=head1 SYNOPSIS
./generate_default_aliases.pl
-mh host -mp password -mu user [-mP port] \\
[-md database] \\
[-th host] [-tP port] \\
[-tu user] [-tp password] [-td database] \\
[-species] \\
[-v]
=head1 DESCRIPTION
A script used to generate a minimal set of required aliases. Assuming the
production_name I<homo_sapiens> we would generate the following
=over 8
=item B<homo_sapiens>
=item B<homo sapiens>
=item B<hsapiens>
=item B<hsap>
=item B<homsap>
=back
It is up to the user to add more via the admin interface. We do not remove
aliases with this script
=head1 OPTIONS
=over 8
=item B<-mh|--mhost>
Host for the production database
=item B<-mP|--mport>
Port for the production database
=item B<-mu|--muser>
User for the production database
=item B<-mp|--mpass>
Pass for the production database
=item B<-md|--mdatabase>
Name for the production database.
=item B<-s|--species>
Species to generate the names for. Can use a SQL pattern here and multiple
cmd line entries. Please use B<production_names>.
=item B<--verbose>
Make the script chatty
=item B<--help>
Help message
=item B<--man>
Man page
=back
=cut
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment