Commit fd7870f4 authored by Kirill Smelkov's avatar Kirill Smelkov

amari.kpi: Rework LogMeasure to prepare Measurement incrementally

We added LogMeasure in 71087f67 (amari.kpi: New package with driver for
Amarisoft LTE stack to retrieve KPI-related measurements from logs) and
its original logic is to read `stats` messages and to create Measurement
that covers [Sx, Sx+1) only after seeing Sx+1.

However in the next patch we will need to also take into account other
smaller messages besides stats, and for that messages we need
being-prepared Measurement to already exist to be able to amend it with
partial data we see. So we need to rework the process to create
Measurement that will cover [Sx, Sx+1) right after seeing Sx without
waiting for Sx+1 to come in.

This patch does that.

Along the way it unifies how events and stats are handled. Previously
events and stats were handled via different objects and the code had many
scattered places that tried to handle cases like event-event,
event-stats, stats-event and stats-stats. And for all those cases the
intent was that we still want to emit corresponding Measurement for all
of them, even if maybe if all NA data besides timestamps. Thus it does
not make sense to split events and stats into different flows - as we can
handle all combinations by considering just one flow of "stats or
events". This simplifies logic and removes several sporadic branches
of code to emit M(ø) around events. It also discovers several places
where we were not emitting such M(ø) even though the intent was to do
so. All this is fixed now with updated tests.
parent 5bf7dc1c
# -*- coding: utf-8 -*-
# Copyright (C) 2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
# Copyright (C) 2022-2023 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
......@@ -46,9 +46,11 @@ class LogMeasure:
# ._rxlog IO reader for enb.xlog
# ._rlog IO reader for enb.log
#
# ._event currently handled xlog.Event | LogError | None
# ._stats currently handled xlog.Message with last read stats result | None
# ._m kpi.Measurement being prepared covering [_stats_prev, _stats) | None
# ._estats \/ last xlog.Message with read stats result
# \/ last xlog.Event | LogError
# \/ None
# ._m kpi.Measurement being prepared covering [_estats_prev, _estats) | None
# ._m_next kpi.Measurement being prepared covering [_estats, _estats_next) | None
pass
......@@ -61,9 +63,9 @@ class LogMeasure:
def __init__(logm, rxlog, rlog):
logm._rxlog = xlog.Reader(rxlog)
logm._rlog = rlog
logm._event = None
logm._stats = None
logm._estats = None
logm._m = None
logm._m_next = None
# close releases resources associated with LogMeasure and closes underlying readers.
@func(LogMeasure)
......@@ -77,6 +79,7 @@ def close(logm):
# It reads data from enb.xlog (TODO and enb.log) as needed.
@func(LogMeasure)
def read(logm): # -> kpi.Measurement | None
_trace('\n\n LogMeasure.read')
m = logm._read()
_trace(' <-', m)
return m
......@@ -99,41 +102,41 @@ def _read(logm):
#
#
# (*) see kpi.Measurement documentation for more details about init/fini correction.
m = None # kpi.Measurement to return
while 1:
_trace()
_trace('._event:\t', logm._event)
_trace('._stats:\t', logm._stats)
_trace('._m: \t', logm._m)
# flush the queue fully at an error or an event, e.g. at "service detach".
event = logm._event
if event is not None:
# <- M for [stats_prev, stats)
_trace('m: \t', m)
_trace('._m: \t', logm._m)
_trace('._estats:\t', logm._estats)
_trace('._m_next:\t', logm._m_next)
if m is not None:
return m
# flush the queue at an error or an event, e.g. at "service detach".
estats = logm._estats
if isinstance(estats, (xlog.Event, LogError)):
# <- M for [estats_prev, estats)
m = logm._m
if m is not None:
logm._m = None
return m
# <- M(ø) for [stats, event)
stats = logm._stats
if stats is not None:
logm._stats = None
if event.timestamp is not None:
m = kpi.Measurement()
m['X.Tstart'] = stats.timestamp
m['X.δT'] = event.timestamp - stats.timestamp
return m
# note ._m_next is not flushed:
# if ._m_next != None - it remains initialized with X.Tstart = estats.timestamp
# <- error|EOF
if isinstance(event, LogError):
logm._event = None
if event is LogError.EOF:
if isinstance(estats, LogError):
logm._estats = None
if estats is LogError.EOF:
return None
raise event
raise estats
# queue should be fully flushed now
assert logm._stats is None
assert logm._m is None
# event might remain non-none, e.g. "service detach", but not an error
assert isinstance(event, xlog.Event)
# queue should be flushed now till including estats with
# event remaining non-none, e.g. "service detach", but not an error
assert logm._m is None
assert isinstance(logm._estats, xlog.Event)
assert isinstance(logm._m_next, kpi.Measurement)
assert logm._m_next['X.Tstart'] == logm._estats.timestamp
# fetch next entry from xlog
......@@ -145,35 +148,49 @@ def _read(logm):
if x is None:
x = LogError.EOF # represent EOF as LogError
if isinstance(x, LogError):
logm._event = x # it is ok to forget previous event after e.g. bad line with ParseError
continue # flush the queue
elif isinstance(x, xlog.Event):
event_prev = logm._event
logm._event = x
if event_prev is None:
continue # flush
# <- M(ø) for [event_prev, event)
assert event_prev.timestamp is not None # LogErrors are raised after queue flush
m = kpi.Measurement()
m['X.Tstart'] = event_prev.timestamp
m['X.δT'] = x.timestamp - event_prev.timestamp
return m
assert isinstance(x, xlog.Message)
if x.message != "stats":
continue
# handle messages that update current Measurement
if isinstance(x, xlog.Message):
if x.message != "stats":
continue # ignore other messages
# it is an error, event or stats.
# if it is an event or stats -> finalize timestamp for _m_next.
# start building next _m_next covering [x, x_next).
# shift m <- ._m <- ._m_next <- (new Measurement | None for LogError)
# a LogError throws away preceding Measurement and does not start a new one after it
if logm._m_next is not None:
if not isinstance(x, LogError):
logm._m_next['X.δT'] = x.timestamp - logm._m_next['X.Tstart']
else:
logm._m_next = None # throw it away on seeing e.g. "stats, error"
m = logm._m
logm._m = logm._m_next
if not isinstance(x, LogError):
logm._m_next = kpi.Measurement()
logm._m_next['X.Tstart'] = x.timestamp # note X.δT remains NA until next stats|event
else:
logm._m_next = None
if isinstance(x, (xlog.Event, LogError)):
logm._estats = x # it is ok to forget previous event after e.g. bad line with ParseError
continue # flush the queue
m = logm._read_stats(x)
if m is not None:
return m
assert isinstance(x, xlog.Message)
assert x.message == "stats"
logm._handle_stats(x, m)
# NOTE _handle_stats indicates logic error in x by setting ._estats to
# LogError instead of stats. However those LogErrors come with
# timestamp and are thus treated similarly to events: we do not throw
# away neither ._m, nor ._m_next like we do with LogErrors that
# represent errors at the log parsing level.
continue
# _read_stats handles next stats xlog entry upon _read request.
# _handle_stats handles next stats xlog entry upon _read request.
@func(LogMeasure)
def _read_stats(logm, stats: xlog.Message): # -> kpi.Measurement|None(to retry)
def _handle_stats(logm, stats: xlog.Message, m_prev: kpi.Measurement):
# build Measurement from stats' counters.
#
# we take δ(stats_prev, stat) and process it mapping Amarisoft counters to
......@@ -207,37 +224,26 @@ def _read_stats(logm, stats: xlog.Message): # -> kpi.Measurement|None(to retry)
try:
_stats_check(stats)
except LogError as e:
event_prev = logm._event
logm._event = e
if event_prev is not None:
# <- M(ø) for [event, bad_stats)
m = kpi.Measurement()
m['X.Tstart'] = event_prev.timestamp
m['X.δT'] = stats.timestamp - event_prev.timestamp
return m
return None # flush
logm._estats = e # stays M(ø) for [estats_prev, bad_stats)
return
# stats is pre-checked to be good. push it to the queue.
stats_prev = logm._stats
logm._stats = stats
# first stats after service attach -> M(ø)
if stats_prev is None:
event_prev = logm._event
if event_prev is not None:
# <- M(ø) for [event, stats)
logm._event = None
m = kpi.Measurement()
m['X.Tstart'] = event_prev.timestamp
m['X.δT'] = stats.timestamp - event_prev.timestamp
return m
return None
estats_prev = logm._estats
logm._estats = stats
# first stats after e.g. service attach -> stays M(ø) for [event_prev, stats)
if estats_prev is None:
return
if isinstance(estats_prev, (xlog.Event, LogError)):
return
# we have 2 adjacent stats. Start building new Measurement from their δ.
assert isinstance(estats_prev, xlog.Message)
assert estats_prev.message == "stats"
stats_prev = estats_prev
# we have 2 adjacent stats. Adjust corresponding Measurement from their δ.
# do init/fini correction if there was also third preceding stats message.
m = kpi.Measurement() # [stats_prev, stats)
m['X.Tstart'] = stats_prev.timestamp
m['X.δT'] = stats.timestamp - stats_prev.timestamp
m = logm._m.copy() # [stats_prev, stats)
# δcc(counter) tells how specified cumulative counter changed since last stats result.
def δcc(counter):
......@@ -250,8 +256,8 @@ def _read_stats(logm, stats: xlog.Message): # -> kpi.Measurement|None(to retry)
# m_initfini populates m[init] and m[fini] from vinit and vfini values.
# copy of previous ._m[fini] is correspondingly adjusted for init/fini correction.
p = None
if logm._m is not None:
p = logm._m.copy()
if m_prev is not None:
p = m_prev.copy()
def m_initfini(init, vinit, fini, vfini):
m[init] = vinit
m[fini] = vfini
......@@ -303,13 +309,14 @@ def _read_stats(logm, stats: xlog.Message): # -> kpi.Measurement|None(to retry)
_ = e
e = LogError(stats.timestamp, "internal failure")
e.__cause__ = _
logm._stats = None
logm._event = e
return None
logm._estats = e
return
# all adjustments and checks are over.
logm._m = m # we can now remember pre-built Measurement for current stats,
return p # and return adjusted previous measurement, if it was there.
logm._m = m # we can now remember our Measurement adjustments for current stats,
if m_prev is not None: # and commit adjustments to previous measurement, if it was there.
m_prev.put((0,), p) # copy m_prev <- p
return
# _stats_check verifies stats message to have required structure.
......
# -*- coding: utf-8 -*-
# Copyright (C) 2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
# Copyright (C) 2022-2023 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
......@@ -61,6 +61,7 @@ class tLogMeasure:
# xlog appends one line to enb.xlog.
def xlog(t, line):
trace('xlog += %s' % line)
line = b(line)
assert b'\n' not in line
pos = t._fxlog.tell()
......@@ -121,24 +122,12 @@ def test_LogMeasure():
_ = t.expect1
# empty stats after first attach
t.xlog( jstats(0.7, {}) )
t.xlog( jstats(1, {}) )
_('X.Tstart', 0.02)
_('X.δT', 0.7-0.02)
_('X.δT', 1-0.02)
t.expect_nodata()
t.read()
# note: no t.read() - see tstats
# further empty stats
t.xlog( jstats(1.0, {}) )
_('X.Tstart', 0.7)
_('X.δT', 1-0.7)
_('RRC.ConnEstabAtt.sum', 0)
_('RRC.ConnEstabSucc.sum', 0)
_('S1SIG.ConnEstabAtt', 0)
_('S1SIG.ConnEstabSucc', 0)
_('ERAB.EstabInitAttNbr.sum', 0)
_('ERAB.EstabInitSuccNbr.sum', 0)
_('ERAB.EstabAddAttNbr.sum', 0)
_('ERAB.EstabAddSuccNbr.sum', 0)
# tstats is the verb to check handling of stats message.
#
......@@ -195,6 +184,21 @@ def test_LogMeasure():
counters_prev = {} # reset
# further empty stats
tstats({})
_('X.Tstart', 1)
_('X.δT', 1)
_('RRC.ConnEstabAtt.sum', 0)
_('RRC.ConnEstabSucc.sum', 0)
_('S1SIG.ConnEstabAtt', 0)
_('S1SIG.ConnEstabSucc', 0)
_('ERAB.EstabInitAttNbr.sum', 0)
_('ERAB.EstabInitSuccNbr.sum', 0)
_('ERAB.EstabAddAttNbr.sum', 0)
_('ERAB.EstabAddSuccNbr.sum', 0)
# RRC.ConnEstab
#
# For init/fini correction LogMeasure accounts termination events in the
......@@ -373,17 +377,25 @@ def test_LogMeasure_badinput():
" but only single-cell configurations are supported"):
t.read()
tbadcell(11, 0)
read_nodata(11, 1)
tbadcell(12, 0)
read_nodata(12, 1)
tbadcell(13, 2)
read_nodata(13, 1)
tbadcell(14, 3)
def tbadstats(τ, error):
with raises(LogError, match="t%s: stats: %s" % (τ, error)):
t.read()
read_nodata(14, 7)
tbadstats(21, ":10/cells/1 no `counters`")
read_nodata(21, 1)
tbadstats(22, ":11/cells/1/counters no `messages`")
read_nodata(22, 1)
tbadstats(23, ":12/ no `counters`")
read_nodata(23, 1)
tbadstats(24, ":13/counters no `messages`")
read_nodata(24, 7)
readok(31, 5) # 31-32
def tbadline():
......@@ -414,18 +426,19 @@ def test_LogMeasure_cc_wraparound():
def readok(τ, CC_value):
_('X.Tstart', τ)
_('X.δT', 1)
_(CC, CC_value)
_('X.δT', int(τ+1)-τ)
if CC_value is not None:
_(CC, CC_value)
else:
t.expect_nodata()
t.read()
_('X.Tstart', 0.02) # attach-1
_('X.δT', 0.98)
t.expect_nodata()
t.read()
readok(0.02, None) # attach-1
readok(1, 13) # 1-2
readok(2, None) # 2-3 M(ø)
with raises(LogError, match=r"t3: cc %s↓ \(13 → 12\)" % cc):
t.read() # 2-3
t.read() # 2-3 raise
readok(3, None) # 3-4 M(ø)
readok(4, 10) # 4-5
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment