mass communicating

Ensembl databases

Importing many ensembl databases at the same time.

Ensembl Databases

You can download Ensembl’s MySQL databases in one go from Here is a Perl script to import them all into a MySQL database based on

WARNING: THIS SCRIPT DROPS THE DATABASES IT CREATES BEFORE INSTALLING. Read the script before running it. If it does something unintended, it's your own fault.

1) Running the script to test what it will import:

cd {path_where_you_downloaded_the_databases_to}
perl -u {mysql user name} -p {mysql password} [-d {name regex filter}]

2) Running the import:

cd {path_where_you_downloaded_the_databases_to}
perl -u {mysql user name} -p {mysql password} [-d {name regex filter}] -x

The Script

Here it is:

use strict;

use Getopt::Std;

use File::Glob ':glob';
use File::Spec qw(catfile);
use File::Basename;
use File::Temp qw(tempfile);

my %opts;

getopts('u:p:d:x', \%opts);

my $mysql_user = $opts{u} || 'root';
my $mysql_pass = $opts{p} ||'';

my $d = $opts{d};
my $execute = $opts{x};

my @files = <*>;

foreach (@files) {
    if (-d) {
        my $dir = $_;
        my $qdir = basename ($dir);

        if (defined ($d)) {
            next if !($qdir =~ /$d/);

        my $f = File::Spec->catfile($dir, $qdir . ".sql.gz");

        if (-f $f) {
            print "Adding DB $qdir (via $f)\n";

            if ($execute) {
                my ($fh, $filename) = tempfile();

                print $fh <<END;
                close $fh;

                print `mysql --user=$mysql_user --password=$mysql_pass < $filename`;
                print `gunzip -c $f | mysql --user=$mysql_user --password=$mysql_pass $qdir`;

                my $txtfiles = my $f = File::Spec->catfile($dir, "*.txt.gz");
                my @tf = bsd_glob ($txtfiles);

                foreach my $txt (@tf) {
                    print "Adding data from $txt\n";
                    `gunzip $txt`;
                    $txt=~ s/\.gz$//;
                    print `mysqlimport --user=$mysql_user --password=$mysql_pass --fields_escaped_by=\\\\ -L $qdir $txt`;
                    `gzip $txt`;