Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
P
Pyston
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
Pyston
Commits
de36c6be
Commit
de36c6be
authored
9 years ago
by
Kevin Modzelewski
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #676 from kmod/perf
An assortment of misc small perf changes
parents
2e9a9e35
ac3dedc2
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
77 additions
and
45 deletions
+77
-45
Makefile
Makefile
+1
-5
microbenchmarks/getattrfunc_ubench.py
microbenchmarks/getattrfunc_ubench.py
+10
-0
section_ordering.txt
section_ordering.txt
+1
-0
src/analysis/scoping_analysis.h
src/analysis/scoping_analysis.h
+5
-3
src/asm_writing/rewriter.cpp
src/asm_writing/rewriter.cpp
+3
-2
src/core/threading.cpp
src/core/threading.cpp
+11
-33
src/core/threading.h
src/core/threading.h
+31
-1
src/runtime/descr.cpp
src/runtime/descr.cpp
+3
-1
src/runtime/dict.cpp
src/runtime/dict.cpp
+5
-0
src/runtime/inline/link_forcer.cpp
src/runtime/inline/link_forcer.cpp
+3
-0
src/runtime/list.cpp
src/runtime/list.cpp
+4
-0
No files found.
Makefile
View file @
de36c6be
...
@@ -108,11 +108,7 @@ else
...
@@ -108,11 +108,7 @@ else
LLVM_BUILD
:=
$(LLVM_TRUNK_BUILD)
LLVM_BUILD
:=
$(LLVM_TRUNK_BUILD)
endif
endif
ifeq
($(FORCE_TRUNK_BINARIES),1)
LLVM_BIN
:=
./build/Release/llvm/bin
LLVM_BIN
:=
$(LLVM_TRUNK_BUILD)
/Release/bin
else
LLVM_BIN
:=
$(LLVM_BUILD)
/Release/bin
endif
LLVM_LINK_LIBS
:=
core mcjit native bitreader bitwriter ipo irreader debuginfodwarf instrumentation
LLVM_LINK_LIBS
:=
core mcjit native bitreader bitwriter ipo irreader debuginfodwarf instrumentation
ifneq
($(ENABLE_INTEL_JIT_EVENTS),0)
ifneq
($(ENABLE_INTEL_JIT_EVENTS),0)
...
...
This diff is collapsed.
Click to expand it.
microbenchmarks/getattrfunc_ubench.py
0 → 100644
View file @
de36c6be
class
C
(
object
):
pass
def
f
():
g
=
getattr
c
=
C
()
c
.
o
=
1
for
i
in
xrange
(
10000000
):
g
(
c
,
"o"
)
f
()
This diff is collapsed.
Click to expand it.
section_ordering.txt
View file @
de36c6be
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
.text._ZN6pyston15objectNewNoArgsEPNS_10BoxedClassE
.text._ZN6pyston15objectNewNoArgsEPNS_10BoxedClassE
.text._PyIndex_Check
.text._PyIndex_Check
.text._ZN6pyston9threading21allowGLReadPreemptionEv
.text._ZN6pyston9threading21allowGLReadPreemptionEv
.text._ZN6pyston9threading22_allowGLReadPreemptionEv
.text._ZN6pyston9getOpNameEi
.text._ZN6pyston9getOpNameEi
.text._ZN6pyston8callFuncEPNS_17BoxedFunctionBaseEPNS_15CallRewriteArgsENS_11ArgPassSpecEPNS_3BoxES6_S6_PS6_PKSt6vectorIPKSsSaISA_EE
.text._ZN6pyston8callFuncEPNS_17BoxedFunctionBaseEPNS_15CallRewriteArgsENS_11ArgPassSpecEPNS_3BoxES6_S6_PS6_PKSt6vectorIPKSsSaISA_EE
.text._ZN6pyston2gc9GCVisitor5visitEPv
.text._ZN6pyston2gc9GCVisitor5visitEPv
...
...
This diff is collapsed.
Click to expand it.
src/analysis/scoping_analysis.h
View file @
de36c6be
...
@@ -15,6 +15,8 @@
...
@@ -15,6 +15,8 @@
#ifndef PYSTON_ANALYSIS_SCOPINGANALYSIS_H
#ifndef PYSTON_ANALYSIS_SCOPINGANALYSIS_H
#define PYSTON_ANALYSIS_SCOPINGANALYSIS_H
#define PYSTON_ANALYSIS_SCOPINGANALYSIS_H
#include "llvm/ADT/DenseMap.h"
#include "core/common.h"
#include "core/common.h"
#include "core/stringpool.h"
#include "core/stringpool.h"
...
@@ -146,14 +148,14 @@ public:
...
@@ -146,14 +148,14 @@ public:
class
ScopingAnalysis
{
class
ScopingAnalysis
{
public:
public:
struct
ScopeNameUsage
;
struct
ScopeNameUsage
;
typedef
std
::
unordered_m
ap
<
AST
*
,
ScopeNameUsage
*>
NameUsageMap
;
typedef
llvm
::
DenseM
ap
<
AST
*
,
ScopeNameUsage
*>
NameUsageMap
;
private:
private:
std
::
unordered_m
ap
<
AST
*
,
ScopeInfo
*>
scopes
;
llvm
::
DenseM
ap
<
AST
*
,
ScopeInfo
*>
scopes
;
AST_Module
*
parent_module
;
AST_Module
*
parent_module
;
InternedStringPool
*
interned_strings
;
InternedStringPool
*
interned_strings
;
std
::
unordered_m
ap
<
AST
*
,
AST
*>
scope_replacements
;
llvm
::
DenseM
ap
<
AST
*
,
AST
*>
scope_replacements
;
ScopeInfo
*
analyzeSubtree
(
AST
*
node
);
ScopeInfo
*
analyzeSubtree
(
AST
*
node
);
void
processNameUsages
(
NameUsageMap
*
usages
);
void
processNameUsages
(
NameUsageMap
*
usages
);
...
...
This diff is collapsed.
Click to expand it.
src/asm_writing/rewriter.cpp
View file @
de36c6be
...
@@ -186,8 +186,9 @@ void Rewriter::ConstLoader::moveImmediate(uint64_t val, assembler::Register dst_
...
@@ -186,8 +186,9 @@ void Rewriter::ConstLoader::moveImmediate(uint64_t val, assembler::Register dst_
assembler
::
Register
Rewriter
::
ConstLoader
::
findConst
(
uint64_t
val
,
bool
&
found_value
)
{
assembler
::
Register
Rewriter
::
ConstLoader
::
findConst
(
uint64_t
val
,
bool
&
found_value
)
{
assert
(
rewriter
->
phase_emitting
);
assert
(
rewriter
->
phase_emitting
);
if
(
constToVar
.
count
(
val
)
>
0
)
{
auto
it
=
constToVar
.
find
(
val
);
RewriterVar
*
var
=
constToVar
[
val
];
if
(
it
!=
constToVar
.
end
())
{
RewriterVar
*
var
=
it
->
second
;
for
(
Location
l
:
var
->
locations
)
{
for
(
Location
l
:
var
->
locations
)
{
if
(
l
.
type
==
Location
::
Register
)
{
if
(
l
.
type
==
Location
::
Register
)
{
found_value
=
true
;
found_value
=
true
;
...
...
This diff is collapsed.
Click to expand it.
src/core/threading.cpp
View file @
de36c6be
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
#include "core/threading.h"
#include "core/threading.h"
#include <atomic>
#include <cstdio>
#include <cstdio>
#include <cstdlib>
#include <cstdlib>
#include <err.h>
#include <err.h>
...
@@ -481,7 +480,7 @@ extern "C" void endAllowThreads() noexcept {
...
@@ -481,7 +480,7 @@ extern "C" void endAllowThreads() noexcept {
static
pthread_mutex_t
gil
=
PTHREAD_MUTEX_INITIALIZER
;
static
pthread_mutex_t
gil
=
PTHREAD_MUTEX_INITIALIZER
;
st
atic
st
d
::
atomic
<
int
>
threads_waiting_on_gil
(
0
);
std
::
atomic
<
int
>
threads_waiting_on_gil
(
0
);
static
pthread_cond_t
gil_acquired
=
PTHREAD_COND_INITIALIZER
;
static
pthread_cond_t
gil_acquired
=
PTHREAD_COND_INITIALIZER
;
extern
"C"
void
PyEval_ReInitThreads
()
noexcept
{
extern
"C"
void
PyEval_ReInitThreads
()
noexcept
{
...
@@ -524,9 +523,6 @@ void releaseGLWrite() {
...
@@ -524,9 +523,6 @@ void releaseGLWrite() {
pthread_mutex_unlock
(
&
gil
);
pthread_mutex_unlock
(
&
gil
);
}
}
#define GIL_CHECK_INTERVAL 1000
// Note: this doesn't need to be an atomic, since it should
// only be accessed by the thread that holds the gil:
int
gil_check_count
=
0
;
int
gil_check_count
=
0
;
// TODO: this function is fair in that it forces a thread to give up the GIL
// TODO: this function is fair in that it forces a thread to give up the GIL
...
@@ -535,37 +531,19 @@ int gil_check_count = 0;
...
@@ -535,37 +531,19 @@ int gil_check_count = 0;
// switching back and forth, and a third that never gets run.
// switching back and forth, and a third that never gets run.
// We could enforce fairness by having a FIFO of events (implementd with mutexes?)
// We could enforce fairness by having a FIFO of events (implementd with mutexes?)
// and make sure to always wake up the longest-waiting one.
// and make sure to always wake up the longest-waiting one.
void
allowGLReadPreemption
()
{
void
_allowGLReadPreemption
()
{
#if ENABLE_SAMPLING_PROFILER
assert
(
gil_check_count
>=
GIL_CHECK_INTERVAL
);
if
(
unlikely
(
sigprof_pending
))
{
gil_check_count
=
0
;
// Output multiple stacktraces if we received multiple signals
// between being able to handle it (such as being in LLVM or the GC),
// to try to fully account for that time.
while
(
sigprof_pending
)
{
_printStacktrace
();
sigprof_pending
--
;
}
}
#endif
// Double-checked locking: first read with no ordering constraint:
// Double check this, since if we are wrong about there being a thread waiting on the gil,
if
(
!
threads_waiting_on_gil
.
load
(
std
::
memory_order_relaxed
))
// we're going to get stuck in the following pthread_cond_wait:
if
(
!
threads_waiting_on_gil
.
load
(
std
::
memory_order_seq_cst
))
return
;
return
;
gil_check_count
++
;
threads_waiting_on_gil
++
;
if
(
gil_check_count
>=
GIL_CHECK_INTERVAL
)
{
pthread_cond_wait
(
&
gil_acquired
,
&
gil
);
gil_check_count
=
0
;
threads_waiting_on_gil
--
;
pthread_cond_signal
(
&
gil_acquired
);
// Double check this, since if we are wrong about there being a thread waiting on the gil,
// we're going to get stuck in the following pthread_cond_wait:
if
(
!
threads_waiting_on_gil
.
load
(
std
::
memory_order_seq_cst
))
return
;
threads_waiting_on_gil
++
;
pthread_cond_wait
(
&
gil_acquired
,
&
gil
);
threads_waiting_on_gil
--
;
pthread_cond_signal
(
&
gil_acquired
);
}
}
}
#elif THREADING_USE_GRWL
#elif THREADING_USE_GRWL
static
pthread_rwlock_t
grwl
=
PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
;
static
pthread_rwlock_t
grwl
=
PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
;
...
...
This diff is collapsed.
Click to expand it.
src/core/threading.h
View file @
de36c6be
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#ifndef PYSTON_CORE_THREADING_H
#ifndef PYSTON_CORE_THREADING_H
#define PYSTON_CORE_THREADING_H
#define PYSTON_CORE_THREADING_H
#include <atomic>
#include <cstdint>
#include <cstdint>
#include <cstring>
#include <cstring>
#include <ucontext.h>
#include <ucontext.h>
...
@@ -81,7 +82,36 @@ void acquireGLRead();
...
@@ -81,7 +82,36 @@ void acquireGLRead();
void
releaseGLRead
();
void
releaseGLRead
();
void
acquireGLWrite
();
void
acquireGLWrite
();
void
releaseGLWrite
();
void
releaseGLWrite
();
void
allowGLReadPreemption
();
void
_allowGLReadPreemption
();
#define GIL_CHECK_INTERVAL 1000
// Note: this doesn't need to be an atomic, since it should
// only be accessed by the thread that holds the gil:
extern
int
gil_check_count
;
extern
std
::
atomic
<
int
>
threads_waiting_on_gil
;
inline
void
allowGLReadPreemption
()
{
#if ENABLE_SAMPLING_PROFILER
if
(
unlikely
(
sigprof_pending
))
{
// Output multiple stacktraces if we received multiple signals
// between being able to handle it (such as being in LLVM or the GC),
// to try to fully account for that time.
while
(
sigprof_pending
)
{
_printStacktrace
();
sigprof_pending
--
;
}
}
#endif
// Double-checked locking: first read with no ordering constraint:
if
(
!
threads_waiting_on_gil
.
load
(
std
::
memory_order_relaxed
))
return
;
gil_check_count
++
;
if
(
likely
(
gil_check_count
<
GIL_CHECK_INTERVAL
))
return
;
_allowGLReadPreemption
();
}
// Note: promoteGL is free to drop the lock and then reacquire
// Note: promoteGL is free to drop the lock and then reacquire
void
promoteGL
();
void
promoteGL
();
void
demoteGL
();
void
demoteGL
();
...
...
This diff is collapsed.
Click to expand it.
src/runtime/descr.cpp
View file @
de36c6be
...
@@ -393,9 +393,11 @@ Box* BoxedWrapperDescriptor::descr_get(Box* _self, Box* inst, Box* owner) noexce
...
@@ -393,9 +393,11 @@ Box* BoxedWrapperDescriptor::descr_get(Box* _self, Box* inst, Box* owner) noexce
if
(
inst
==
None
)
if
(
inst
==
None
)
return
self
;
return
self
;
if
(
!
isSubclass
(
inst
->
cls
,
self
->
type
))
if
(
!
isSubclass
(
inst
->
cls
,
self
->
type
))
{
PyErr_Format
(
TypeError
,
"Descriptor '' for '%s' objects doesn't apply to '%s' object"
,
PyErr_Format
(
TypeError
,
"Descriptor '' for '%s' objects doesn't apply to '%s' object"
,
getFullNameOfClass
(
self
->
type
).
c_str
(),
getFullTypeName
(
inst
).
c_str
());
getFullNameOfClass
(
self
->
type
).
c_str
(),
getFullTypeName
(
inst
).
c_str
());
return
NULL
;
}
return
new
BoxedWrapperObject
(
self
,
inst
);
return
new
BoxedWrapperObject
(
self
,
inst
);
}
}
...
...
This diff is collapsed.
Click to expand it.
src/runtime/dict.cpp
View file @
de36c6be
...
@@ -249,6 +249,8 @@ extern "C" PyObject* PyDict_GetItem(PyObject* dict, PyObject* key) noexcept {
...
@@ -249,6 +249,8 @@ extern "C" PyObject* PyDict_GetItem(PyObject* dict, PyObject* key) noexcept {
return
d
->
getOrNull
(
key
);
return
d
->
getOrNull
(
key
);
}
}
// XXX this would be easy to make much faster.
// This path doesn't exist in CPython; we have it to support extension modules that do
// This path doesn't exist in CPython; we have it to support extension modules that do
// something along the lines of PyDict_GetItem(PyModule_GetDict()):
// something along the lines of PyDict_GetItem(PyModule_GetDict()):
try
{
try
{
...
@@ -304,6 +306,9 @@ extern "C" int PyDict_Next(PyObject* op, Py_ssize_t* ppos, PyObject** pkey, PyOb
...
@@ -304,6 +306,9 @@ extern "C" int PyDict_Next(PyObject* op, Py_ssize_t* ppos, PyObject** pkey, PyOb
}
}
extern
"C"
PyObject
*
PyDict_GetItemString
(
PyObject
*
dict
,
const
char
*
key
)
noexcept
{
extern
"C"
PyObject
*
PyDict_GetItemString
(
PyObject
*
dict
,
const
char
*
key
)
noexcept
{
if
(
dict
->
cls
==
attrwrapper_cls
)
return
unwrapAttrWrapper
(
dict
)
->
getattr
(
key
);
Box
*
key_s
;
Box
*
key_s
;
try
{
try
{
key_s
=
boxString
(
key
);
key_s
=
boxString
(
key
);
...
...
This diff is collapsed.
Click to expand it.
src/runtime/inline/link_forcer.cpp
View file @
de36c6be
...
@@ -17,6 +17,7 @@
...
@@ -17,6 +17,7 @@
#include "codegen/irgen/hooks.h"
#include "codegen/irgen/hooks.h"
#include "core/ast.h"
#include "core/ast.h"
#include "core/threading.h"
#include "core/types.h"
#include "core/types.h"
#include "gc/heap.h"
#include "gc/heap.h"
#include "runtime/complex.h"
#include "runtime/complex.h"
...
@@ -138,6 +139,8 @@ void force() {
...
@@ -138,6 +139,8 @@ void force() {
FORCE
(
boxedLocalsGet
);
FORCE
(
boxedLocalsGet
);
FORCE
(
boxedLocalsDel
);
FORCE
(
boxedLocalsDel
);
FORCE
(
threading
::
allowGLReadPreemption
);
// FORCE(listIter);
// FORCE(listIter);
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
src/runtime/list.cpp
View file @
de36c6be
...
@@ -594,6 +594,10 @@ Box* listIAdd(BoxedList* self, Box* _rhs) {
...
@@ -594,6 +594,10 @@ Box* listIAdd(BoxedList* self, Box* _rhs) {
int
s1
=
self
->
size
;
int
s1
=
self
->
size
;
int
s2
=
rhs
->
size
;
int
s2
=
rhs
->
size
;
if
(
s2
==
0
)
return
self
;
self
->
ensure
(
s1
+
s2
);
self
->
ensure
(
s1
+
s2
);
memcpy
(
self
->
elts
->
elts
+
s1
,
rhs
->
elts
->
elts
,
sizeof
(
rhs
->
elts
->
elts
[
0
])
*
s2
);
memcpy
(
self
->
elts
->
elts
+
s1
,
rhs
->
elts
->
elts
,
sizeof
(
rhs
->
elts
->
elts
[
0
])
*
s2
);
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment