Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
apachedex
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Sebastien Robin
apachedex
Commits
75fdd631
Commit
75fdd631
authored
Apr 03, 2013
by
Vincent Pelletier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add optional detailed error analysis.
parent
724784ae
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
52 additions
and
7 deletions
+52
-7
apachedex/__init__.py
apachedex/__init__.py
+52
-7
No files found.
apachedex/__init__.py
View file @
75fdd631
...
...
@@ -33,7 +33,7 @@
# - provide some form of raw data output, not just html
# - allow user to specify min & max dates
from
cgi
import
escape
from
collections
import
defaultdict
from
collections
import
defaultdict
,
Counter
from
datetime
import
datetime
,
tzinfo
,
timedelta
from
functools
import
partial
from
operator
import
itemgetter
...
...
@@ -68,7 +68,10 @@ US_PER_S = 10 ** 6
N_SLOWEST
=
20
N_SLOWEST_THRESHOLD
=
N_SLOWEST
*
4
N_ERROR_URL
=
10
N_REFERRER_PER_ERROR_URL
=
5
ITEMGETTER0
=
itemgetter
(
0
)
ITEMGETTER1
=
itemgetter
(
1
)
APDEX_TOLERATING_COEF
=
4
def
statusIsError
(
status
):
...
...
@@ -124,15 +127,17 @@ class APDEXStats(object):
return
0
class
GenericSiteStats
(
object
):
def
__init__
(
self
,
threshold
,
prefix
=
1
):
def
__init__
(
self
,
threshold
,
prefix
=
1
,
error_detail
=
False
):
self
.
threshold
=
threshold
self
.
prefix
=
prefix
self
.
error_detail
=
error_detail
self
.
status
=
defaultdict
(
partial
(
defaultdict
,
int
))
if
error_detail
:
self
.
error_url_count
=
defaultdict
(
partial
(
defaultdict
,
list
))
self
.
slowest_list
=
[(
-
1
,
None
,
None
,
None
)]
self
.
apdex
=
defaultdict
(
partial
(
APDEXStats
,
threshold
))
def
accumulate
(
self
,
match
,
url_match
,
date
):
self
.
status
[
match
.
group
(
'status'
)][
date
]
+=
1
self
.
apdex
[
date
].
accumulate
(
match
)
duration
=
int
(
match
.
group
(
'duration'
))
if
url_match
is
None
:
...
...
@@ -145,6 +150,11 @@ class GenericSiteStats(object):
match
.
group
(
'referer'
)))
if
len
(
slowest_list
)
>
N_SLOWEST_THRESHOLD
:
self
.
_housekeeping
()
status
=
match
.
group
(
'status'
)
self
.
status
[
status
][
date
]
+=
1
if
self
.
error_detail
and
statusIsError
(
status
):
# XXX: can eat memory if there are many errors on many different urls
self
.
error_url_count
[
status
][
url
].
append
(
match
.
group
(
'referer'
))
def
_housekeeping
(
self
):
slowest_list
=
self
.
slowest_list
...
...
@@ -196,7 +206,37 @@ class GenericSiteStats(object):
for
date
in
column_list
:
append
(
hitTd
(
data_dict
[
date
],
status
))
append
(
'</tr>'
)
append
(
'</table><h2>Slowest pages</h2><table><tr><th>duration (s)</th>'
append
(
'</table>'
)
if
self
.
error_detail
:
def
getHitForUrl
(
referer_counter
):
return
sum
(
referer_counter
.
itervalues
())
filtered_status_url
=
defaultdict
(
partial
(
defaultdict
,
dict
))
for
status
,
url_dict
in
self
.
error_url_count
.
iteritems
():
filtered_status_url
[
status
]
=
sorted
(
((
key
,
Counter
(
value
))
for
key
,
value
in
url_dict
.
iteritems
()),
key
=
lambda
x
:
getHitForUrl
(
x
[
1
]),
reverse
=
True
)[:
N_ERROR_URL
]
append
(
'<h3>Error detail</h3><table><tr><th>status</th><th>hit</th>'
'<th>url</th><th>referers</th></tr>'
)
for
status
,
url_list
in
sorted
(
filtered_status_url
.
iteritems
(),
key
=
ITEMGETTER0
):
append
(
'<tr><th rowspan="%s">%s</th>'
%
(
len
(
url_list
),
status
))
first_url
=
True
for
url
,
referer_counter
in
url_list
:
if
first_url
:
first_url
=
False
else
:
append
(
'<tr>'
)
append
(
'<td>%s</td><td class="text">%s</td>'
'<td class="text">%s</td>'
%
(
getHitForUrl
(
referer_counter
),
url
,
'<br/>'
.
join
(
'%i: %s'
%
(
hit
,
referer
)
for
referer
,
hit
in
sorted
(
referer_counter
.
iteritems
(),
key
=
ITEMGETTER1
,
reverse
=
True
)[:
N_REFERRER_PER_ERROR_URL
]),
))
append
(
'</tr>'
)
append
(
'</table>'
)
append
(
'<h2>Slowest pages</h2><table><tr><th>duration (s)</th>'
'<th>date</th><th>url</th><th>referer</th></tr>'
)
for
duration
,
timestamp
,
url
,
referer
in
reversed
(
self
.
slowest_list
):
if
timestamp
is
None
:
...
...
@@ -221,8 +261,9 @@ class ERP5SiteStats(GenericSiteStats):
- If a line belongs to a module and has at least 2 slashes after module,
count line as belonging to a document of that module
"""
def
__init__
(
self
,
threshold
,
prefix
=
1
):
super
(
ERP5SiteStats
,
self
).
__init__
(
threshold
,
prefix
=
prefix
)
def
__init__
(
self
,
threshold
,
prefix
=
1
,
error_detail
=
False
):
super
(
ERP5SiteStats
,
self
).
__init__
(
threshold
,
prefix
=
prefix
,
error_detail
=
error_detail
)
# Key levels:
# - module id (string)
# - is document (bool)
...
...
@@ -355,6 +396,8 @@ def main():
parser
.
add_argument
(
'-a'
,
'--apdex'
,
default
=
US_PER_S
,
type
=
int
,
help
=
'First threshold for Apdex computation, in microseconds. '
'Default: %(default)r'
)
parser
.
add_argument
(
'-e'
,
'--error-detail'
,
action
=
'store_true'
,
help
=
'Include detailed report (url & referers) for error statuses.'
)
parser
.
add_argument
(
'-d'
,
'--default'
,
help
=
'Caption for lines matching no prefix, or skip them if not provided.'
)
parser
.
add_argument
(
'--base'
,
dest
=
'site_list'
,
default
=
[],
...
...
@@ -418,6 +461,7 @@ def main():
infile_list
=
args
.
logfile
quiet
=
args
.
quiet
threshold
=
args
.
apdex
error_detail
=
args
.
error_detail
file_count
=
len
(
infile_list
)
per_site
=
{}
hit_per_day
=
defaultdict
(
int
)
...
...
@@ -469,7 +513,8 @@ def main():
try
:
site_data
=
per_site
[
site
]
except
KeyError
:
site_data
=
per_site
[
site
]
=
action
(
threshold
)
site_data
=
per_site
[
site
]
=
action
(
threshold
,
error_detail
=
error_detail
)
site_data
.
accumulate
(
match
,
url_match
,
date
)
all_lines
+=
lineno
end_parsing_time
=
time
.
time
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment