Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
apachedex
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Jérome Perrin
apachedex
Commits
d14743d9
Commit
d14743d9
authored
Apr 07, 2013
by
Vincent Pelletier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implement automatic period selection.
parent
2c0837f3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
88 additions
and
6 deletions
+88
-6
TODO
TODO
+0
-1
apachedex/__init__.py
apachedex/__init__.py
+88
-5
No files found.
TODO
View file @
d14743d9
- use some templating system instead of hardcoded html strings
- provide some form of raw data output, not just html
- allow user to specify min & max dates
- automatically select period from log data ?
apachedex/__init__.py
View file @
d14743d9
...
...
@@ -66,6 +66,7 @@ N_REFERRER_PER_ERROR_URL = 5
ITEMGETTER0
=
itemgetter
(
0
)
ITEMGETTER1
=
itemgetter
(
1
)
APDEX_TOLERATING_COEF
=
4
AUTO_PERIOD_COEF
=
200
def
statusIsError
(
status
):
return
status
[
0
]
>
'3'
...
...
@@ -260,6 +261,18 @@ class GenericSiteStats(object):
self
.
url_apdex
=
defaultdict
(
partial
(
APDEXStats
,
threshold
,
getDuration
))
self
.
apdex
=
defaultdict
(
partial
(
APDEXStats
,
threshold
,
getDuration
))
def
rescale
(
self
,
convert
,
getDuration
):
self
.
getDuration
=
getDuration
for
status
,
date_dict
in
self
.
status
.
iteritems
():
new_date_dict
=
defaultdict
(
int
)
for
date
,
status_count
in
date_dict
.
iteritems
():
new_date_dict
[
convert
(
date
)]
+=
status_count
self
.
status
[
status
]
=
new_date_dict
new_apdex
=
defaultdict
(
partial
(
APDEXStats
,
self
.
threshold
,
getDuration
))
for
date
,
data
in
self
.
apdex
.
iteritems
():
new_apdex
[
convert
(
date
)].
accumulateFrom
(
data
)
self
.
apdex
=
new_apdex
def
accumulate
(
self
,
match
,
url_match
,
date
):
self
.
apdex
[
date
].
accumulate
(
match
)
if
url_match
is
None
:
...
...
@@ -381,6 +394,20 @@ class ERP5SiteStats(GenericSiteStats):
defaultdict
,
partial
(
APDEXStats
,
threshold
,
getDuration
))))
self
.
no_module
=
defaultdict
(
partial
(
APDEXStats
,
threshold
,
getDuration
))
def
rescale
(
self
,
convert
,
getDuration
):
super
(
ERP5SiteStats
,
self
).
rescale
(
convert
,
getDuration
)
threshold
=
self
.
threshold
for
document_dict
in
self
.
module
.
itervalues
():
for
is_document
,
date_dict
in
document_dict
.
iteritems
():
new_date_dict
=
defaultdict
(
partial
(
APDEXStats
,
threshold
,
getDuration
))
for
date
,
data
in
date_dict
.
iteritems
():
new_date_dict
[
convert
(
date
)].
accumulateFrom
(
data
)
document_dict
[
is_document
]
=
new_date_dict
new_no_module
=
defaultdict
(
partial
(
APDEXStats
,
threshold
,
getDuration
))
for
date
,
data
in
self
.
no_module
.
iteritems
():
new_no_module
[
convert
(
date
)].
accumulateFrom
(
data
)
self
.
no_module
=
new_no_module
def
accumulate
(
self
,
match
,
url_match
,
date
):
split
=
self
.
suffix
(
url_match
.
group
(
'url'
)).
split
(
'?'
,
1
)[
0
].
split
(
'/'
)
if
split
and
split
[
0
].
endswith
(
'_module'
):
...
...
@@ -527,6 +554,9 @@ def _weekStringAsQuarterString(timestamp):
year
,
month
,
_
=
timestamp
.
split
(
'/'
)
return
'%s/%02i'
%
(
year
,
int
(
month
)
/
3
*
3
+
1
)
def
_roundWeek
(
dt
):
return
dt
.
replace
(
day
=
dt
.
day
/
7
*
7
+
1
)
def
_asDayString
(
timestamp
):
dt
,
_
=
timestamp
.
split
(
' '
)
day
,
month
,
year
=
dt
.
split
(
':'
,
1
)[
0
].
split
(
'/'
)
...
...
@@ -539,6 +569,9 @@ def _as6HourString(timestamp):
return
'%s/%02i/%s %02i'
%
(
year
,
MONTH_VALUE_DICT
[
month
],
day
,
int
(
hour
)
/
6
*
6
)
def
_round6Hour
(
dt
):
return
dt
.
replace
(
hour
=
dt
.
hour
/
6
*
6
)
def
_hourAsWeekString
(
timestamp
):
dt
=
datetime
.
strptime
(
timestamp
,
'%Y/%m/%d %H'
)
return
(
dt
-
timedelta
(
dt
.
weekday
())).
date
().
strftime
(
'%Y/%m/%d'
)
...
...
@@ -559,6 +592,8 @@ def _asHourString(timestamp):
# datetime.datetime instance
# - period during which a placeholder point will be added if there is no data
# point
# - round a datetime.datetime instance so once represented using given format
# string it is a valid graph-granularity date for period
period_parser
=
{
'year'
:
(
_asMonthString
,
...
...
@@ -567,6 +602,7 @@ period_parser = {
'%Y/%m'
,
# Longest month: 31 days
timedelta
(
31
),
lambda
x
:
x
,
),
'quarter'
:
(
_asWeekString
,
...
...
@@ -576,6 +612,7 @@ period_parser = {
'7 days'
,
'%Y/%m/%d'
,
timedelta
(
7
),
_roundWeek
,
),
'month'
:
(
_asDayString
,
...
...
@@ -584,6 +621,7 @@ period_parser = {
'%Y/%m/%d'
,
# Longest day: 24 hours + 1h DST (never more ?)
timedelta
(
seconds
=
3600
*
25
),
lambda
x
:
x
,
),
'week'
:
(
_as6HourString
,
...
...
@@ -591,6 +629,7 @@ period_parser = {
'6 hours'
,
'%Y/%m/%d %H'
,
timedelta
(
seconds
=
3600
*
6
),
_round6Hour
,
),
'day'
:
(
_asHourString
,
...
...
@@ -599,6 +638,7 @@ period_parser = {
'%Y/%m/%d %H'
,
# Longest hour: 60 * 60 seconds + 1 leap second.
timedelta
(
seconds
=
3601
),
lambda
x
:
x
,
),
}
...
...
@@ -623,8 +663,12 @@ def main():
'Default: %(default).2fs'
)
group
.
add_argument
(
'-e'
,
'--error-detail'
,
action
=
'store_true'
,
help
=
'Include detailed report (url & referers) for error statuses.'
)
group
.
add_argument
(
'-p'
,
'--period'
,
default
=
'day'
,
choices
=
period_parser
,
help
=
'Periodicity of sampling buckets. Default: %(default)r'
)
group
.
add_argument
(
'-p'
,
'--period'
,
choices
=
period_parser
,
help
=
'Periodicity of sampling buckets. Default: (decide from data). '
'Performance note: leaving out this parameter reduces parsing '
'performance, as each period increase requires re-dispatching already '
'processed data. To mitigate this, provide earliest and latest log '
'files before all others (ex: log0 log3 log1 log2).'
)
group
.
add_argument
(
'-s'
,
'--stats'
,
action
=
'store_true'
,
help
=
'Enable parsing stats (time spent parsing input, time spent '
'generating output, ...)'
)
...
...
@@ -690,8 +734,23 @@ def main():
assert
not
key
,
key
matchline
=
re
.
compile
(
line_regex
).
match
matchrequest
=
REQUEST_PATTERN
.
match
asDate
,
decimator
,
graph_period
,
date_format
,
placeholder_delta
=
\
period_parser
[
args
.
period
]
if
args
.
period
is
None
:
next_period_data
=
((
x
,
y
[
4
]
*
AUTO_PERIOD_COEF
)
for
(
x
,
y
)
in
sorted
(
period_parser
.
iteritems
(),
key
=
lambda
x
:
x
[
1
][
4
])).
next
period
,
to_next_period
=
next_period_data
()
earliest_date
=
latest_date
=
None
def
getNextPeriod
():
# datetime is slow (compared to string operations), but not many choices
return
(
datetime
.
strptime
(
earliest_date
,
date_format
)
+
to_next_period
).
strftime
(
date_format
)
def
rescale
(
x
):
result
=
round_date
(
datetime
.
strptime
(
x
,
old_date_format
)).
strftime
(
date_format
)
return
result
else
:
to_next_period
=
None
period
=
args
.
period
asDate
,
decimator
,
graph_period
,
date_format
,
placeholder_delta
,
\
round_date
=
period_parser
[
period
]
site_list
=
args
.
path
default_site
=
args
.
default
if
default_site
is
None
:
...
...
@@ -757,6 +816,30 @@ def main():
skipped_lines
+=
1
continue
date
=
asDate
(
match
.
group
(
'timestamp'
))
if
to_next_period
is
not
None
:
if
date
>
latest_date
:
# '' > None is True
latest_date
=
date
if
date
<
earliest_date
or
earliest_date
is
None
:
earliest_date
=
date
next_period
=
getNextPeriod
()
if
latest_date
>
next_period
:
try
:
while
latest_date
>
next_period
:
period
,
to_next_period
=
next_period_data
()
next_period
=
getNextPeriod
()
except
StopIteration
:
pass
print
>>
sys
.
stderr
,
'Increasing period to'
,
period
,
'...'
,
old_date_format
=
date_format
asDate
,
decimator
,
graph_period
,
date_format
,
placeholder_delta
,
\
round_date
=
period_parser
[
period
]
period_increase_start
=
time
.
time
()
print
old_date_format
,
date_format
for
site_data
in
per_site
.
itervalues
():
site_data
.
rescale
(
rescale
,
getDuration
)
print
>>
sys
.
stderr
,
'done (%s)'
%
timedelta
(
seconds
=
time
.
time
()
-
period_increase_start
)
date
=
asDate
(
match
.
group
(
'timestamp'
))
try
:
site_data
=
per_site
[
site
]
except
KeyError
:
...
...
@@ -793,7 +876,7 @@ def main():
'<table class="stats">'
)
for
caption
,
value
in
(
(
'apdex threshold'
,
'%.2fs'
%
args
.
apdex
),
(
'period'
,
args
.
period
),
(
'period'
,
args
.
period
or
(
period
+
' (auto)'
)
),
):
out
.
write
(
'<tr><th class="text">%s</th><td>%s</td></tr>'
%
(
caption
,
value
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment