Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
ZODB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Kirill Smelkov
ZODB
Commits
b80f50c8
Commit
b80f50c8
authored
Nov 14, 2003
by
Jeremy Hylton
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add a quick hack that reports summary statistics from fsdump.
parent
1a0f03bf
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
198 additions
and
0 deletions
+198
-0
trunk/src/scripts/fsstats.py
trunk/src/scripts/fsstats.py
+198
-0
No files found.
trunk/src/scripts/fsstats.py
0 → 100755
View file @
b80f50c8
#!python
"""Print details statistics from fsdump output."""
import
re
import
sys
rx_txn
=
re
.
compile
(
"tid=([0-9a-f]+).*size=(
\
d+)
"
)
rx_data = re.compile("
oid
=
([
0
-
9
a
-
f
]
+
)
class
=
(
\
S
+
)
size
=
(
\
d
+
)
")
def sort_byhsize(seq, reverse=False):
L = [(v.size(), k, v) for k, v in seq]
L.sort()
if reverse:
L.reverse()
return [(k, v) for n, k, v in L]
class Histogram(dict):
def add(self, size):
self[size] = self.get(size, 0) + 1
def size(self):
return sum(self.itervalues())
def mean(self):
product = sum([k * v for k, v in self.iteritems()])
return product / self.size()
def median(self):
# close enough?
n = self.size() / 2
L = self.keys()
L.sort()
L.reverse()
while 1:
k = L.pop()
if self[k] > n:
return k
n -= self[k]
def mode(self):
mode = 0
value = 0
for k, v in self.iteritems():
if v > value:
value = v
mode = k
return mode
def make_bins(self, binsize):
maxkey = max(self.iterkeys())
self.binsize = binsize
self.bins = [0] * (1 + maxkey / binsize)
for k, v in self.iteritems():
b = k / binsize
self.bins[b] += v
def report(self, name, binsize=50, usebins=False, gaps=True, skip=True):
if usebins:
# Use existing bins with whatever size they have
binsize = self.binsize
else:
# Make new bins
self.make_bins(binsize)
maxval = max(self.bins)
# Print up to 40 dots for a value
dot = max(maxval / 40, 1)
tot = sum(self.bins)
print name
print "
Total
", tot,
print "
Median
", self.median(),
print "
Mean
", self.mean(),
print "
Mode
", self.mode(),
print "
Max
", max(self)
print "
One
*
represents
", dot
gap = False
cum = 0
for i, n in enumerate(self.bins):
if gaps and (not n or (skip and not n / dot)):
if not gap:
print "
...
"
gap = True
continue
gap = False
p = 100 * n / tot
cum += n
pc = 100 * cum / tot
print "
%
6
d
%
6
d
%
3
d
%%
%
3
d
%%
%
s
" % (
i * binsize, n, p, pc, "
*
" * (n / dot))
print
def class_detail(class_size):
# summary of classes
fmt = "
%
5
s
%
6
s
%
6
s
%
6
s
%-
50.50
s
"
labels = ["
num
", "
median
", "
mean
", "
mode
", "
class
"]
print fmt % tuple(labels)
print fmt % tuple(["
-
" * len(s) for s in labels])
for klass, h in sort_byhsize(class_size.iteritems()):
print fmt % (h.size(), h.median(), h.mean(), h.mode(), klass)
print
# per class details
for klass, h in sort_byhsize(class_size.iteritems(), reverse=True):
h.make_bins(50)
if len(filter(None, h.bins)) == 1:
continue
h.report("
Object
size
for
%
s
" % klass, usebins=True)
def revision_detail(lifetimes, classes):
# Report per-class details for any object modified more than once
for name, oids in classes.iteritems():
h = Histogram()
keep = False
for oid in dict.fromkeys(oids, 1):
L = lifetimes.get(oid)
n = len(L)
h.add(n)
if n > 1:
keep = True
if keep:
h.report("
Number
of
revisions
for
%
s
" % name, binsize=10)
def main(path):
txn_objects = Histogram() # histogram of txn size in objects
txn_bytes = Histogram() # histogram of txn size in bytes
obj_size = Histogram() # histogram of object size
n_updates = Histogram() # oid -> num updates
n_classes = Histogram() # class -> num objects
lifetimes = {} # oid -> list of tids
class_size = {} # class -> histogram of object size
classes = {} # class -> list of oids
MAX = 0
tid = None
f = open(path, "rb")
for i, line in enumerate(f):
if MAX and i > MAX:
break
if line.startswith("
data
"):
m = rx_data.search(line)
if not m:
continue
oid, klass, size = m.groups()
size = int(size)
obj_size.add(size)
n_updates.add(oid)
n_classes.add(klass)
h = class_size.get(klass)
if h is None:
h = class_size[klass] = Histogram()
h.add(size)
L = lifetimes.setdefault(oid, [])
L.append(tid)
L = classes.setdefault(klass, [])
L.append(oid)
objects += 1
elif line.startswith("
Trans
"):
if tid is not None:
txn_objects.add(objects)
m = rx_txn.search(line)
if not m:
continue
tid, size = m.groups()
size = int(size)
objects = 0
txn_bytes.add(size)
f.close()
print "
Summary
:
%
d
txns
,
%
d
objects
,
%
d
revisions
" % (
txn_objects.size(), len(n_updates), n_updates.size())
print
txn_bytes.report("
Transaction
size
(
bytes
)
", binsize=1024)
txn_objects.report("
Transaction
size
(
objects
)
", binsize=10)
obj_size.report("
Object
size
", binsize=128)
# object lifetime info
h = Histogram()
for k, v in lifetimes.items():
h.add(len(v))
h.report("
Number
of
revisions
", binsize=10, skip=False)
# details about revisions
revision_detail(lifetimes, classes)
class_detail(class_size)
if __name__ == "
__main__
":
main(sys.argv[1])
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment