File: //bin/splitdump.py
#!/usr/bin/python -W ignore::DeprecationWarning
# -*- coding: utf-8 -*-
"""
splitdump splits a mysqldump file in several files (one per table)
"""
__title__ = "splitdump"
__version__ = "0.5"
__author__= "Carles Amigo"
__email__= "fr3nd at fr3nd dot net"
import sys
import re
import gzip
from optparse import OptionParser
def split_file(file, included, excluded, included_databases, excluded_databases, directory, use_gzip, store_databases):
database = None
if use_gzip:
extension = ".sql.gz"
else:
extension = ".sql"
header_file = directory + "/00_header" + extension
if use_gzip:
output = gzip.open(header_file,"w")
else:
output = open(header_file,"w")
pat = re.compile('^DROP TABLE IF EXISTS `?(.*)`;')
pat2 = re.compile('^CREATE DATABASE .+`(.*)` .+;')
previous_database = None
database = None
for line in file:
matchdb = pat2.search(line)
if matchdb:
previous_database = database
database = matchdb.group(1)
if store_databases:
if (database not in excluded_databases) and ((database == None) or (len(included_databases) == 0) or (database in included_databases)):
if previous_database != database:
previous_database = database
if database:
filename = directory + "/" + database + extension
output.close()
if use_gzip:
output = gzip.open(filename, "w")
else:
output = open(filename, "w")
output.write(line)
else:
if (database not in excluded_databases) and ((database == None) or (len(included_databases) == 0) or (database in included_databases)):
match = pat.search(line)
if match:
table_name = match.groups(1)[0]
if (table_name not in excluded) and ((len(included) == 0) or (table_name in included)):
filename = directory + "/"
if database:
filename = filename + database + "."
filename = filename + table_name + extension
else:
filename = "/dev/null"
output.close()
if use_gzip:
output = gzip.open(filename, "w")
else:
output = open(filename, "w")
output.write(line)
output.close()
def main():
usage = "usage: %prog [options]\n Split a mysqldumpfile in multiple files (one per database).\n Use --help to view options"
parser = OptionParser(usage, version=__version__)
parser.add_option("-f", "--file", action="store", dest="file", type="string", default=None, help="Input file. Defaults to stdin")
parser.add_option("-i", "--include_table", action="append", dest="included_tables", default=[], help="Tables to be included. By default, all tables are dumped. Can be called more than once.")
parser.add_option("-e", "--exclude_table", action="append", dest="excluded_tables", default=[], help="Tables to be excluded. By default, no table is excluded. Can be called more than once.")
parser.add_option("-I", "--include_database", action="append", dest="included_databases", default=[], help="Databases to be included. By default, all databases are dumped. Can be called more than once.")
parser.add_option("-E", "--exclude_database", action="append", dest="excluded_databases", default=[], help="Databases to be excluded. By default, no database is excluded. Can be called more than once.")
parser.add_option("-D", "--databases", action="store_true", dest="store_databases", default=False, help="Store databases instead of tables in individual files.")
parser.add_option("-d", "--directory", action="store", dest="directory", type="string", default=".", help="Output directory where to store dump files.")
parser.add_option("-g", "--gzip", action="store_true", dest="gzip", default=False, help="Compress (.gz) output files.")
(options, args) = parser.parse_args()
if not options.file:
if not sys.stdin.isatty():
try:
file = sys.stdin
except:
parser.error("Error opening stdin")
else:
parser.error("No input file specified")
else:
try:
file = open(options.file,"r")
except:
parser.error("Error opening file")
split_file(file, options.included_tables, options.excluded_tables, options.included_databases, options.excluded_databases, options.directory, options.gzip, options.store_databases)
if __name__ == "__main__":
main()