Commit 9cc31938 authored by Umesh Nerlige Ramappa's avatar Umesh Nerlige Ramappa

i915/perf: Drop the aging_tail logic in perf OA

On DG2, capturing OA reports while running heavy render workloads
sometimes results in invalid OA reports where 64-byte chunks inside
reports have stale values. Under memory pressure, high OA sampling rates
(13.3 us) and heavy render workload, occasionally, the OA HW TAIL
pointer does not progress as fast as the sampling rate. When these
glitches occur, the TAIL pointer takes approx. 200us to progress.  While
this is expected behavior from the HW perspective, invalid reports are
not expected.

In oa_buffer_check_unlocked(), when we execute the if condition, we are
updating the oa_buffer.tail to the aging tail and then setting pollin
based on this tail value, however, we do not have a chance to rewind and
validate the reports prior to setting pollin. The validation happens
in a subsequent call to oa_buffer_check_unlocked(). If a read occurs
before this validation, then we end up reading reports up until this
oa_buffer.tail value which includes invalid reports. Though found on
DG2, this affects all platforms.

The aging tail logic is no longer necessary since we are explicitly
checking for landed reports.

Start by dropping the aging tail logic.

v2:
- Drop extra blank line
- Add reason to drop aging logic (Ashutosh)
- Add bug links (Ashutosh)
- rename aged_tail to read_tail
- Squash patches 3 and 1

v3: (Ashutosh)
- Remove extra spaces
- Remove gtt_offset from the pollin calculation
- s/Bug:/Link/ in commit message (checkpatch)

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7484
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7757Signed-off-by: default avatarUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: default avatarAshutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230605193923.1836048-2-umesh.nerlige.ramappa@intel.com
parent 81b1b599
...@@ -531,8 +531,7 @@ static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report) ...@@ -531,8 +531,7 @@ static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report)
* (See description of OA_TAIL_MARGIN_NSEC above for further details.) * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
* *
* Besides returning true when there is data available to read() this function * Besides returning true when there is data available to read() this function
* also updates the tail, aging_tail and aging_timestamp in the oa_buffer * also updates the tail in the oa_buffer object.
* object.
* *
* Note: It's safe to read OA config state here unlocked, assuming that this is * Note: It's safe to read OA config state here unlocked, assuming that this is
* only called while the stream is enabled, while the global OA configuration * only called while the stream is enabled, while the global OA configuration
...@@ -544,10 +543,10 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) ...@@ -544,10 +543,10 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
{ {
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
int report_size = stream->oa_buffer.format->size; int report_size = stream->oa_buffer.format->size;
u32 head, tail, read_tail;
unsigned long flags; unsigned long flags;
bool pollin; bool pollin;
u32 hw_tail; u32 hw_tail;
u64 now;
u32 partial_report_size; u32 partial_report_size;
/* We have to consider the (unlikely) possibility that read() errors /* We have to consider the (unlikely) possibility that read() errors
...@@ -568,62 +567,47 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) ...@@ -568,62 +567,47 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
/* Subtract partial amount off the tail */ /* Subtract partial amount off the tail */
hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size); hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size);
now = ktime_get_mono_fast_ns(); /* NB: The head we observe here might effectively be a little
* out of date. If a read() is in progress, the head could be
if (hw_tail == stream->oa_buffer.aging_tail && * anywhere between this head and stream->oa_buffer.tail.
(now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) { */
/* If the HW tail hasn't move since the last check and the HW head = stream->oa_buffer.head - gtt_offset;
* tail has been aging for long enough, declare it the new read_tail = stream->oa_buffer.tail - gtt_offset;
* tail.
*/ hw_tail -= gtt_offset;
stream->oa_buffer.tail = stream->oa_buffer.aging_tail; tail = hw_tail;
} else {
u32 head, tail, aged_tail; /* Walk the stream backward until we find a report with report
* id and timestmap not at 0. Since the circular buffer pointers
/* NB: The head we observe here might effectively be a little * progress by increments of 64 bytes and that reports can be up
* out of date. If a read() is in progress, the head could be * to 256 bytes long, we can't tell whether a report has fully
* anywhere between this head and stream->oa_buffer.tail. * landed in memory before the report id and timestamp of the
*/ * following report have effectively landed.
head = stream->oa_buffer.head - gtt_offset; *
aged_tail = stream->oa_buffer.tail - gtt_offset; * This is assuming that the writes of the OA unit land in
* memory in the order they were written to.
hw_tail -= gtt_offset; * If not : (╯°□°)╯︵ ┻━┻
tail = hw_tail; */
while (OA_TAKEN(tail, read_tail) >= report_size) {
/* Walk the stream backward until we find a report with report void *report = stream->oa_buffer.vaddr + tail;
* id and timestmap not at 0. Since the circular buffer pointers
* progress by increments of 64 bytes and that reports can be up
* to 256 bytes long, we can't tell whether a report has fully
* landed in memory before the report id and timestamp of the
* following report have effectively landed.
*
* This is assuming that the writes of the OA unit land in
* memory in the order they were written to.
* If not : (╯°□°)╯︵ ┻━┻
*/
while (OA_TAKEN(tail, aged_tail) >= report_size) {
void *report = stream->oa_buffer.vaddr + tail;
if (oa_report_id(stream, report) || if (oa_report_id(stream, report) ||
oa_timestamp(stream, report)) oa_timestamp(stream, report))
break; break;
tail = (tail - report_size) & (OA_BUFFER_SIZE - 1); tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
} }
if (OA_TAKEN(hw_tail, tail) > report_size && if (OA_TAKEN(hw_tail, tail) > report_size &&
__ratelimit(&stream->perf->tail_pointer_race)) __ratelimit(&stream->perf->tail_pointer_race))
drm_notice(&stream->uncore->i915->drm, drm_notice(&stream->uncore->i915->drm,
"unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n",
head, tail, hw_tail); head, tail, hw_tail);
stream->oa_buffer.tail = gtt_offset + tail; stream->oa_buffer.tail = gtt_offset + tail;
stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
stream->oa_buffer.aging_timestamp = now;
}
pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset, pollin = OA_TAKEN(stream->oa_buffer.tail,
stream->oa_buffer.head - gtt_offset) >= report_size; stream->oa_buffer.head) >= report_size;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
...@@ -1727,7 +1711,6 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) ...@@ -1727,7 +1711,6 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
gtt_offset | OABUFFER_SIZE_16M); gtt_offset | OABUFFER_SIZE_16M);
/* Mark that we need updated tail pointers to read from... */ /* Mark that we need updated tail pointers to read from... */
stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
stream->oa_buffer.tail = gtt_offset; stream->oa_buffer.tail = gtt_offset;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
...@@ -1779,7 +1762,6 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) ...@@ -1779,7 +1762,6 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */ /* Mark that we need updated tail pointers to read from... */
stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
stream->oa_buffer.tail = gtt_offset; stream->oa_buffer.tail = gtt_offset;
/* /*
...@@ -1833,7 +1815,6 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) ...@@ -1833,7 +1815,6 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
gtt_offset & GEN12_OAG_OATAILPTR_MASK); gtt_offset & GEN12_OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */ /* Mark that we need updated tail pointers to read from... */
stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
stream->oa_buffer.tail = gtt_offset; stream->oa_buffer.tail = gtt_offset;
/* /*
......
...@@ -312,18 +312,6 @@ struct i915_perf_stream { ...@@ -312,18 +312,6 @@ struct i915_perf_stream {
*/ */
spinlock_t ptr_lock; spinlock_t ptr_lock;
/**
* @aging_tail: The last HW tail reported by HW. The data
* might not have made it to memory yet though.
*/
u32 aging_tail;
/**
* @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
* was read; used to determine when it is old enough to trust.
*/
u64 aging_timestamp;
/** /**
* @head: Although we can always read back the head pointer register, * @head: Although we can always read back the head pointer register,
* we prefer to avoid trusting the HW state, just to avoid any * we prefer to avoid trusting the HW state, just to avoid any
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment