Commit 1e506a81 authored by Kazuhiko Shiozaki's avatar Kazuhiko Shiozaki Committed by Kirill Smelkov

zodbanalyze: now supports both FileStorage and repozo deltafs

/reviewed-on !1
/see-also slapos!116
parent ab17cf2d
......@@ -8,5 +8,6 @@ scripts anymore. So we are here:
- `zodbanalyze` - analyze FileStorage or repozo deltafs usage.
- `zodbcmp` - compare content of two ZODB databases bit-to-bit.
- `zodbdump` - dump content of a ZODB database.
......@@ -26,6 +26,7 @@ setup(
# zodb cmd ...
# zodb dump ...
entry_points= {'console_scripts': [
'zodbanalyze = zodbtool.zodbanalyze:main',
'zodbcmp = zodbtool.zodbcmp:main',
'zodbdump = zodbtool.zodbdump:main',
#!/usr/bin/env python2.4
#!/usr/bin/env python
# Based on a transaction analyzer by Matt Kromer.
......@@ -8,12 +8,43 @@ import getopt
import anydbm as dbm
import tempfile
import shutil
from ZODB.FileStorage import FileStorage
from ZODB.FileStorage import FileIterator, FileStorage, packed_version
from ZODB.FileStorage.format import FileStorageFormatter
from ZODB.utils import get_pickle_metadata
class DeltaFileStorage(
def __init__(self, file_name, **kw):
self._file_name = file_name
def iterator(self, start=None, stop=None):
return DeltaFileIterator(self._file_name, start, stop)
class DeltaFileIterator(FileIterator):
def __init__(self, filename, start=None, stop=None, pos=0L):
assert isinstance(filename, str)
file = open(filename, 'rb')
self._file = file,2)
self._file_size = file.tell()
if pos > self._file_size:
raise ValueError("Given position is greater than the file size",
pos, self._file_size)
self._pos = pos
assert start is None or isinstance(start, str)
assert stop is None or isinstance(stop, str)
self._start = start
self._stop = stop
if start:
if self._file_size <= 4:
class Report:
def __init__(self, use_dbm=False):
def __init__(self, use_dbm=False, delta_fs=False):
self.use_dbm = use_dbm
self.delta_fs = delta_fs
if use_dbm:
self.temp_dir = tempfile.mkdtemp()
self.OIDMAP =, 'oidmap.db'),
......@@ -52,6 +83,7 @@ def shorten(s, n):
return "..." + s
def report(rep, csv=False):
delta_fs = rep.delta_fs
if not csv:
print "Processed %d records in %d transactions" % (rep.OIDS, rep.TIDS)
print "Average record size is %7.2f bytes" % (rep.DBYTES * 1.0 / rep.OIDS)
......@@ -59,17 +91,28 @@ def report(rep, csv=False):
(rep.DBYTES * 1.0 / rep.TIDS))
print "Types used:"
if delta_fs:
if csv:
fmt = "%s,%s,%s,%s,%s"
fmtp = "%s,%d,%d,%f%%,%f" # per-class format
fmt = "%-46s %7s %9s %6s %7s"
fmtp = "%-46s %7d %9d %5.1f%% %7.2f" # per-class format
print fmt % ("Class Name", "T.Count", "T.Bytes", "Pct", "AvgSize")
if not csv:
print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7)
if csv:
fmt = "%s,%s,%s,%s,%s,%s,%s,%s,%s"
fmtp = "%s,%d,%d,%f%%,%f,%d,%d,%d,%d" # per-class format
fmt = "%-46s %7s %9s %6s %7s %7s %9s %7s %9s"
fmtp = "%-46s %7d %9d %5.1f%% %7.2f %7d %9d %7d %9d" # per-class format
fmts = "%46s %7d %8dk %5.1f%% %7.2f" # summary format
print fmt % ("Class Name", "T.Count", "T.Bytes", "Pct", "AvgSize",
"C.Count", "C.Bytes", "O.Count", "O.Bytes")
if not csv:
print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7, '-'*7, '-'*9, '-'*7, '-'*9)
fmts = "%46s %7d %8dk %5.1f%% %7.2f" # summary format
typemap = rep.TYPEMAP.keys()
typemap.sort(key=lambda a:rep.TYPESIZE[a])
cumpct = 0.0
......@@ -80,6 +123,10 @@ def report(rep, csv=False):
t_display = t
t_display = shorten(t, 46)
if delta_fs:
print fmtp % (t_display, rep.TYPEMAP[t], rep.TYPESIZE[t],
pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t])
print fmtp % (t_display, rep.TYPEMAP[t], rep.TYPESIZE[t],
pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t],
rep.COIDSMAP[t], rep.CBYTESMAP[t],
......@@ -88,6 +135,13 @@ def report(rep, csv=False):
if csv:
if delta_fs:
print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7)
print "%46s %7d %9s %6s %6.2f" % ('Total Transactions', rep.TIDS, ' ',
' ', rep.DBYTES * 1.0 / rep.TIDS)
print fmts % ('Total Records', rep.OIDS, rep.DBYTES, cumpct,
rep.DBYTES * 1.0 / rep.OIDS)
print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7, '='*7, '='*9, '='*7, '='*9)
print "%46s %7d %9s %6s %6.2fk" % ('Total Transactions', rep.TIDS, ' ',
' ', rep.DBYTES * 1.0 / rep.TIDS / 1024.0)
......@@ -102,10 +156,13 @@ def report(rep, csv=False):
rep.FBYTES * 100.0 / rep.DBYTES,
rep.FBYTES * 1.0 / rep.FOIDS)
def analyze(path, use_dbm):
def analyze(path, use_dbm, delta_fs):
if delta_fs:
fs = DeltaFileStorage(path, read_only=1)
fs = FileStorage(path, read_only=1)
fsi = fs.iterator()
report = Report(use_dbm)
report = Report(use_dbm, delta_fs)
for txn in fsi:
analyze_trans(report, txn)
if use_dbm:
......@@ -130,6 +187,11 @@ def analyze_rec(report, record):
size = len( # Ignores various overhead
report.DBYTES += size
if report.delta_fs:
type = get_type(record)
report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1
report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size
if oid not in report.OIDMAP:
type = get_type(record)
report.OIDMAP[oid] = type
......@@ -160,14 +222,17 @@ def analyze_rec(report, record):
except Exception, err:
print err
__doc__ = """%(program)s: Data.fs analyzer
__doc__ = """%(program)s: Analyzer for FileStorage data or repozo deltafs
usage: %(program)s [options] /path/to/Data.fs
usage: %(program)s [options] /path/to/Data.fs (or /path/to/file.deltafs)
-h, --help this help screen
-c, --csv output CSV
-d, --dbm use DBM as temporary storage to limit memory usage
(no meaning for deltafs case)
Input deltafs file should be uncompressed.
def usage(stream, msg=None):
......@@ -196,7 +261,18 @@ def main():
if opt in ('-h', '--help'):
report(analyze(path, use_dbm), csv)
header = open(path, 'rb').read(4)
if header == packed_version:
delta_fs = False
delta_fs = True
_orig_read_data_header = FileStorageFormatter._read_data_header
def _read_data_header(self, pos, oid=None):
h = _orig_read_data_header(self, pos, oid=oid)
h.tloc = self._tpos
return h
FileStorageFormatter._read_data_header = _read_data_header
report(analyze(path, use_dbm, delta_fs), csv)
if __name__ == "__main__":
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment