From f75eb99c600eeba81f307b8bf852e194adcc688f Mon Sep 17 00:00:00 2001
From: Kazuhiko Shiozaki <kazuhiko@nexedi.com>
Date: Thu, 14 Mar 2013 14:37:00 +0100
Subject: [PATCH] initial copy of ZODB3-3.9.7's ZODB/scripts/analyze.py.

---
 erp5/util/zodbanalyze/__init__.py | 143 ++++++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)
 create mode 100644 erp5/util/zodbanalyze/__init__.py

diff --git a/erp5/util/zodbanalyze/__init__.py b/erp5/util/zodbanalyze/__init__.py
new file mode 100644
index 0000000000..f2f1443d06
--- /dev/null
+++ b/erp5/util/zodbanalyze/__init__.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python2.4
+
+# Based on a transaction analyzer by Matt Kromer.
+
+import pickle
+import re
+import sys
+import types
+from ZODB.FileStorage import FileStorage
+from cStringIO import StringIO
+
+class FakeError(Exception):
+    def __init__(self, module, name):
+        Exception.__init__(self)
+        self.module = module
+        self.name = name
+
+class FakeUnpickler(pickle.Unpickler):
+    def find_class(self, module, name):
+        raise FakeError(module, name)
+
+class Report:
+    def __init__(self):
+        self.OIDMAP = {}
+        self.TYPEMAP = {}
+        self.TYPESIZE = {}
+        self.FREEMAP = {}
+        self.USEDMAP = {}
+        self.TIDS = 0
+        self.OIDS = 0
+        self.DBYTES = 0
+        self.COIDS = 0
+        self.CBYTES = 0
+        self.FOIDS = 0
+        self.FBYTES = 0
+
+def shorten(s, n):
+    l = len(s)
+    if l <= n:
+        return s
+    while len(s) + 3 > n: # account for ...
+        i = s.find(".")
+        if i == -1:
+            # In the worst case, just return the rightmost n bytes
+            return s[-n:]
+        else:
+            s = s[i + 1:]
+            l = len(s)
+    return "..." + s
+
+def report(rep):
+    print "Processed %d records in %d transactions" % (rep.OIDS, rep.TIDS)
+    print "Average record size is %7.2f bytes" % (rep.DBYTES * 1.0 / rep.OIDS)
+    print ("Average transaction size is %7.2f bytes" %
+           (rep.DBYTES * 1.0 / rep.TIDS))
+
+    print "Types used:"
+    fmt = "%-46s %7s %9s %6s %7s"
+    fmtp = "%-46s %7d %9d %5.1f%% %7.2f" # per-class format
+    fmts = "%46s %7d %8dk %5.1f%% %7.2f" # summary format
+    print fmt % ("Class Name", "Count", "TBytes", "Pct", "AvgSize")
+    print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7)
+    typemap = rep.TYPEMAP.keys()
+    typemap.sort()
+    cumpct = 0.0
+    for t in typemap:
+        pct = rep.TYPESIZE[t] * 100.0 / rep.DBYTES
+        cumpct += pct
+        print fmtp % (shorten(t, 46), rep.TYPEMAP[t], rep.TYPESIZE[t],
+                      pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t])
+
+    print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7)
+    print "%46s %7d %9s %6s %6.2fk" % ('Total Transactions', rep.TIDS, ' ',
+        ' ', rep.DBYTES * 1.0 / rep.TIDS / 1024.0)
+    print fmts % ('Total Records', rep.OIDS, rep.DBYTES / 1024.0, cumpct,
+                  rep.DBYTES * 1.0 / rep.OIDS)
+
+    print fmts % ('Current Objects', rep.COIDS, rep.CBYTES / 1024.0,
+                  rep.CBYTES * 100.0 / rep.DBYTES,
+                  rep.CBYTES * 1.0 / rep.COIDS)
+    if rep.FOIDS:
+        print fmts % ('Old Objects', rep.FOIDS, rep.FBYTES / 1024.0,
+                      rep.FBYTES * 100.0 / rep.DBYTES,
+                      rep.FBYTES * 1.0 / rep.FOIDS)
+
+def analyze(path):
+    fs = FileStorage(path, read_only=1)
+    fsi = fs.iterator()
+    report = Report()
+    for txn in fsi:
+        analyze_trans(report, txn)
+    return report
+
+def analyze_trans(report, txn):
+    report.TIDS += 1
+    for rec in txn:
+        analyze_rec(report, rec)
+
+def get_type(record):
+    try:
+        unpickled = FakeUnpickler(StringIO(record.data)).load()
+    except FakeError, err:
+        return "%s.%s" % (err.module, err.name)
+    except:
+        raise
+    classinfo = unpickled[0]
+    if isinstance(classinfo, types.TupleType):
+        mod, klass = classinfo
+        return "%s.%s" % (mod, klass)
+    else:
+        return str(classinfo)
+
+def analyze_rec(report, record):
+    oid = record.oid
+    report.OIDS += 1
+    if record.data is None:
+        # No pickle -- aborted version or undo of object creation.
+        return
+    try:
+        size = len(record.data) # Ignores various overhead
+        report.DBYTES += size
+        if oid not in report.OIDMAP:
+            type = get_type(record)
+            report.OIDMAP[oid] = type
+            report.USEDMAP[oid] = size
+            report.COIDS += 1
+            report.CBYTES += size
+        else:
+            type = report.OIDMAP[oid]
+            fsize = report.USEDMAP[oid]
+            report.FREEMAP[oid] = report.FREEMAP.get(oid, 0) + fsize
+            report.USEDMAP[oid] = size
+            report.FOIDS += 1
+            report.FBYTES += fsize
+            report.CBYTES += size - fsize
+        report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1
+        report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size
+    except Exception, err:
+        print err
+
+if __name__ == "__main__":
+    path = sys.argv[1]
+    report(analyze(path))
-- 
2.30.9