Commit de770c57 authored by Fred Drake's avatar Fred Drake

Refactor to avoid profiling setup and reporting, and add an option to

profile with a warm database cache instead of the cold cache.
parent fb9af5c5
...@@ -74,7 +74,7 @@ class Message: ...@@ -74,7 +74,7 @@ class Message:
class Extra: class Extra:
pass pass
def index(rt, mboxfile, db): def index(rt, mboxfile, db, profiler):
global NUM global NUM
idx_time = 0 idx_time = 0
pack_time = 0 pack_time = 0
...@@ -97,6 +97,41 @@ def index(rt, mboxfile, db): ...@@ -97,6 +97,41 @@ def index(rt, mboxfile, db):
print "opened", mboxfile print "opened", mboxfile
if not NUM: if not NUM:
NUM = sys.maxint NUM = sys.maxint
if profiler:
itime, ptime, i = profiler.runcall(indexmbox, mbox, idx, docs, db)
else:
itime, ptime, i = indexmbox(mbox, idx, docs, db)
idx_time += itime
pack_time += ptime
get_transaction().commit()
if PACK_INTERVAL and i % PACK_INTERVAL != 0:
if VERBOSE >= 2:
print "packing one last time..."
p0 = time.clock()
db.pack(time.time())
p1 = time.clock()
if VERBOSE:
print "pack took %s sec" % (p1 - p0)
pack_time += p1 - p0
if VERBOSE:
finish_time = time.time()
print
print "Index time", round(idx_time / 60, 3), "minutes"
print "Pack time", round(pack_time / 60, 3), "minutes"
print "Index bytes", Message.total_bytes
rate = (Message.total_bytes / idx_time) / 1024
print "Index rate %.2f KB/sec" % rate
print "Indexing began", time.ctime(start_time)
print "Indexing ended", time.ctime(finish_time)
print "Wall clock minutes", round((finish_time - start_time)/60, 3)
def indexmbox(mbox, idx, docs, db):
idx_time = 0
pack_time = 0
i = 0 i = 0
while i < NUM: while i < NUM:
_msg = mbox.next() _msg = mbox.next()
...@@ -126,37 +161,22 @@ def index(rt, mboxfile, db): ...@@ -126,37 +161,22 @@ def index(rt, mboxfile, db):
if VERBOSE: if VERBOSE:
print "pack took %s sec" % (p1 - p0) print "pack took %s sec" % (p1 - p0)
pack_time += p1 - p0 pack_time += p1 - p0
return idx_time, pack_time, i
get_transaction().commit()
if PACK_INTERVAL and i % PACK_INTERVAL != 0:
if VERBOSE >= 2:
print "packing one last time..."
p0 = time.clock()
db.pack(time.time())
p1 = time.clock()
if VERBOSE:
print "pack took %s sec" % (p1 - p0)
pack_time += p1 - p0
if VERBOSE: def query(rt, query_str, profiler):
finish_time = time.time()
print
print "Index time", round(idx_time / 60, 3), "minutes"
print "Pack time", round(pack_time / 60, 3), "minutes"
print "Index bytes", Message.total_bytes
rate = (Message.total_bytes / idx_time) / 1024
print "Index rate %.2f KB/sec" % rate
print "Indexing began", time.ctime(start_time)
print "Indexing ended", time.ctime(finish_time)
print "Wall clock minutes", round((finish_time - start_time)/60, 3)
def query(rt, query_str):
idx = rt["index"] idx = rt["index"]
docs = rt["documents"] docs = rt["documents"]
start = time.clock() start = time.clock()
results, num_results = idx.query(query_str, BEST) if profiler is None:
results, num_results = idx.query(query_str, BEST)
else:
if WARM_CACHE:
print "Warming the cache..."
idx.query(query_str, BEST)
start = time.clock()
results, num_results = profiler.runcall(idx.query, query_str, BEST)
elapsed = time.clock() - start elapsed = time.clock() - start
print "query:", query_str print "query:", query_str
...@@ -180,16 +200,16 @@ def query(rt, query_str): ...@@ -180,16 +200,16 @@ def query(rt, query_str):
print "-" * 60 print "-" * 60
def main(fs_path, mbox_path, query_str): def main(fs_path, mbox_path, query_str, profiler):
f = ZODB.FileStorage.FileStorage(fs_path) f = ZODB.FileStorage.FileStorage(fs_path)
db = ZODB.DB(f, cache_size=CACHE_SIZE) db = ZODB.DB(f, cache_size=CACHE_SIZE)
cn = db.open() cn = db.open()
rt = cn.root() rt = cn.root()
if mbox_path is not None: if mbox_path is not None:
index(rt, mbox_path, db) index(rt, mbox_path, db, profiler)
if query_str is not None: if query_str is not None:
query(rt, query_str) query(rt, query_str, profiler)
cn.close() cn.close()
db.close() db.close()
...@@ -206,12 +226,13 @@ if __name__ == "__main__": ...@@ -206,12 +226,13 @@ if __name__ == "__main__":
TXN_SIZE = 1 TXN_SIZE = 1
BEST = 10 BEST = 10
CONTEXT = 5 CONTEXT = 5
WARM_CACHE = 0
query_str = None query_str = None
mbox_path = None mbox_path = None
profile = None profile = None
old_profile = None old_profile = None
try: try:
opts, args = getopt.getopt(sys.argv[1:], 'vn:p:i:q:b:c:xt:', opts, args = getopt.getopt(sys.argv[1:], 'vn:p:i:q:b:c:xt:w',
['profile=', 'old-profile=']) ['profile=', 'old-profile='])
except getopt.error, msg: except getopt.error, msg:
usage(msg) usage(msg)
...@@ -235,23 +256,30 @@ if __name__ == "__main__": ...@@ -235,23 +256,30 @@ if __name__ == "__main__":
elif o == '-t': elif o == '-t':
TXN_SIZE = int(v) TXN_SIZE = int(v)
elif o == '-c': elif o == '-c':
CONTEXT = int(v) CONTEXT = int(v)
elif o == '-w':
WARM_CACHE = 1
elif o == '--profile': elif o == '--profile':
profile = v profile = v
elif o == '--old-profile': elif o == '--old-profile':
old_profile = v old_profile = v
fs_path, = args fs_path, = args
if profile: if profile:
import hotshot import hotshot
profiler = hotshot.Profile(profile, lineevents=1, linetimings=1) profiler = hotshot.Profile(profile, lineevents=1, linetimings=1)
profiler.runcall(main, fs_path, mbox_path, query_str)
profiler.close()
elif old_profile: elif old_profile:
import profile, pstats import profile
profiler = profile.Profile() profiler = profile.Profile()
profiler.runcall(main, fs_path, mbox_path, query_str) else:
profiler = None
main(fs_path, mbox_path, query_str, profiler)
if profile:
profiler.close()
elif old_profile:
import pstats
profiler.dump_stats(old_profile) profiler.dump_stats(old_profile)
stats = pstats.Stats(old_profile) stats = pstats.Stats(old_profile)
stats.strip_dirs().sort_stats('time').print_stats(20) stats.strip_dirs().sort_stats('time').print_stats(20)
else:
main(fs_path, mbox_path, query_str)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment