Commit 896bc696 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Reduce allocations in stackmap parsing

Maybe a better overall approach is to not convert it out of its
original flat format; we keep that original memory around anyway
and I don't think it's that much faster to scan than our parsed
version.

But for now, optimize the current approach:
- convert vector->SmallVector
- call ensure since we usually know how many elements the vectors will have
- use a StringMap instead of an unordered_map<std::string>
parent 08fe3815
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
namespace pyston { namespace pyston {
void PatchpointInfo::addFrameVar(const std::string& name, CompilerType* type) { void PatchpointInfo::addFrameVar(llvm::StringRef name, CompilerType* type) {
frame_vars.push_back(FrameVarInfo({.name = name, .type = type })); frame_vars.push_back(FrameVarInfo({.name = name, .type = type }));
} }
...@@ -91,7 +91,7 @@ void PatchpointInfo::parseLocationMap(StackMap::Record* r, LocationMap* map) { ...@@ -91,7 +91,7 @@ void PatchpointInfo::parseLocationMap(StackMap::Record* r, LocationMap* map) {
int num_args = frame_var.type->numFrameArgs(); int num_args = frame_var.type->numFrameArgs();
llvm::SmallVector<StackMap::Record::Location, 1> locations; llvm::SmallVector<StackMap::Record::Location, 1> locations;
locations.append(&r->locations[cur_arg], &r->locations[cur_arg + num_args]); locations.append(r->locations.data() + cur_arg, r->locations.data() + cur_arg + num_args);
// printf("%s %d %d\n", frame_var.name.c_str(), r->locations[cur_arg].type, r->locations[cur_arg].regnum); // printf("%s %d %d\n", frame_var.name.c_str(), r->locations[cur_arg].type, r->locations[cur_arg].regnum);
...@@ -151,7 +151,7 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) { ...@@ -151,7 +151,7 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
cf->location_map->constants = stackmap->constants; cf->location_map->constants = stackmap->constants;
for (int i = 0; i < nrecords; i++) { for (int i = 0; i < nrecords; i++) {
StackMap::Record* r = stackmap->records[i]; StackMap::Record* r = &stackmap->records[i];
assert(stackmap->stack_size_records.size() == 1); assert(stackmap->stack_size_records.size() == 1);
const StackMap::StackSizeRecord& stack_size_record = stackmap->stack_size_records[0]; const StackMap::StackSizeRecord& stack_size_record = stackmap->stack_size_records[0];
......
...@@ -42,7 +42,7 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap); ...@@ -42,7 +42,7 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap);
struct PatchpointInfo { struct PatchpointInfo {
public: public:
struct FrameVarInfo { struct FrameVarInfo {
std::string name; llvm::StringRef name;
CompilerType* type; CompilerType* type;
}; };
...@@ -74,7 +74,7 @@ public: ...@@ -74,7 +74,7 @@ public:
int scratchStackmapArg() { return 0; } int scratchStackmapArg() { return 0; }
int scratchSize() { return 80 + MAX_FRAME_SPILLS * sizeof(void*); } int scratchSize() { return 80 + MAX_FRAME_SPILLS * sizeof(void*); }
void addFrameVar(const std::string& name, CompilerType* type); void addFrameVar(llvm::StringRef name, CompilerType* type);
void setNumFrameArgs(int num_frame_args) { void setNumFrameArgs(int num_frame_args) {
assert(num_frame_stackmap_args == -1); assert(num_frame_stackmap_args == -1);
num_frame_stackmap_args = num_frame_args; num_frame_stackmap_args = num_frame_args;
......
...@@ -70,6 +70,7 @@ StackMap* parseStackMap() { ...@@ -70,6 +70,7 @@ StackMap* parseStackMap() {
if (VERBOSITY() >= 3) if (VERBOSITY() >= 3)
printf("%d functions\n", nfunctions); printf("%d functions\n", nfunctions);
cur_map->stack_size_records.reserve(nfunctions);
for (int i = 0; i < nfunctions; i++) { for (int i = 0; i < nfunctions; i++) {
const StackMap::StackSizeRecord& size_record = *ptr.size_record++; const StackMap::StackSizeRecord& size_record = *ptr.size_record++;
cur_map->stack_size_records.push_back(size_record); cur_map->stack_size_records.push_back(size_record);
...@@ -79,6 +80,7 @@ StackMap* parseStackMap() { ...@@ -79,6 +80,7 @@ StackMap* parseStackMap() {
if (VERBOSITY() >= 3) if (VERBOSITY() >= 3)
printf("%d constants\n", nconstants); printf("%d constants\n", nconstants);
cur_map->constants.reserve(nconstants);
for (int i = 0; i < nconstants; i++) { for (int i = 0; i < nconstants; i++) {
uint64_t constant = *ptr.u64++; uint64_t constant = *ptr.u64++;
...@@ -89,16 +91,18 @@ StackMap* parseStackMap() { ...@@ -89,16 +91,18 @@ StackMap* parseStackMap() {
if (VERBOSITY() >= 3) if (VERBOSITY() >= 3)
printf("%d records\n", nrecords); printf("%d records\n", nrecords);
cur_map->records.reserve(nrecords);
for (int i = 0; i < nrecords; i++) { for (int i = 0; i < nrecords; i++) {
StackMap::Record* record = new StackMap::Record(); cur_map->records.emplace_back();
cur_map->records.push_back(record); StackMap::Record* record = &cur_map->records.back();
record->id = *ptr.u64++; record->id = *ptr.u64++;
record->offset = *ptr.u32++; record->offset = *ptr.u32++;
record->flags = *ptr.u16++; // reserved (record flags) record->flags = *ptr.u16++; // reserved (record flags)
int numlocations = *ptr.u16++; int numlocations = *ptr.u16++;
record->locations.reserve(numlocations);
if (VERBOSITY() >= 3) if (VERBOSITY() >= 3)
printf("Stackmap record %ld at 0x%x has %d locations:\n", record->id, record->offset, numlocations); printf("Stackmap record %ld at 0x%x has %d locations:\n", record->id, record->offset, numlocations);
...@@ -125,6 +129,7 @@ StackMap* parseStackMap() { ...@@ -125,6 +129,7 @@ StackMap* parseStackMap() {
ptr.u16++; // padding ptr.u16++; // padding
int num_live_outs = *ptr.u16++; int num_live_outs = *ptr.u16++;
record->live_outs.reserve(num_live_outs);
for (int i = 0; i < num_live_outs; i++) { for (int i = 0; i < num_live_outs; i++) {
const StackMap::Record::LiveOut& r = *ptr.record_liveout++; const StackMap::Record::LiveOut& r = *ptr.record_liveout++;
record->live_outs.push_back(r); record->live_outs.push_back(r);
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <vector> #include <vector>
#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
namespace llvm { namespace llvm {
class JITEventListener; class JITEventListener;
...@@ -61,20 +62,20 @@ struct StackMap { ...@@ -61,20 +62,20 @@ struct StackMap {
uint64_t id; uint64_t id;
uint32_t offset; uint32_t offset;
uint16_t flags; uint16_t flags;
std::vector<Location> locations; llvm::SmallVector<Location, 8> locations;
std::vector<LiveOut> live_outs; llvm::SmallVector<LiveOut, 8> live_outs;
}; };
std::vector<StackSizeRecord> stack_size_records; llvm::SmallVector<StackSizeRecord, 1> stack_size_records;
uint32_t header; uint32_t header;
std::vector<uint64_t> constants; llvm::SmallVector<uint64_t, 8> constants;
std::vector<Record*> records; std::vector<Record> records;
}; };
// TODO this belongs somewhere else? // TODO this belongs somewhere else?
class LocationMap { class LocationMap {
public: public:
std::vector<uint64_t> constants; llvm::SmallVector<uint64_t, 8> constants;
StackMap::Record::Location frame_info_location; StackMap::Record::Location frame_info_location;
bool frameInfoFound() { return frame_info_location.type != 0; } bool frameInfoFound() { return frame_info_location.type != 0; }
...@@ -88,10 +89,10 @@ public: ...@@ -88,10 +89,10 @@ public:
CompilerType* type; CompilerType* type;
llvm::SmallVector<StackMap::Record::Location, 1> locations; llvm::SmallVector<StackMap::Record::Location, 1> locations;
}; };
std::vector<LocationEntry> locations; llvm::SmallVector<LocationEntry, 2> locations;
}; };
std::unordered_map<std::string, LocationTable> names; llvm::StringMap<LocationTable> names;
}; };
StackMap* parseStackMap(); StackMap* parseStackMap();
......
...@@ -866,7 +866,7 @@ DeoptState getDeoptState() { ...@@ -866,7 +866,7 @@ DeoptState getDeoptState() {
std::unordered_set<std::string> is_undefined; std::unordered_set<std::string> is_undefined;
for (const auto& p : cf->location_map->names) { for (const auto& p : cf->location_map->names) {
if (!startswith(p.first, "!is_defined_")) if (!startswith(p.first(), "!is_defined_"))
continue; continue;
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) { for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
...@@ -876,7 +876,7 @@ DeoptState getDeoptState() { ...@@ -876,7 +876,7 @@ DeoptState getDeoptState() {
assert(locs.size() == 1); assert(locs.size() == 1);
uint64_t v = frame_iter->readLocation(locs[0]); uint64_t v = frame_iter->readLocation(locs[0]);
if ((v & 1) == 0) if ((v & 1) == 0)
is_undefined.insert(p.first.substr(12)); is_undefined.insert(p.first().substr(12));
break; break;
} }
...@@ -884,10 +884,10 @@ DeoptState getDeoptState() { ...@@ -884,10 +884,10 @@ DeoptState getDeoptState() {
} }
for (const auto& p : cf->location_map->names) { for (const auto& p : cf->location_map->names) {
if (p.first[0] == '!') if (p.first()[0] == '!')
continue; continue;
if (is_undefined.count(p.first)) if (is_undefined.count(p.first()))
continue; continue;
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) { for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
...@@ -895,16 +895,16 @@ DeoptState getDeoptState() { ...@@ -895,16 +895,16 @@ DeoptState getDeoptState() {
const auto& locs = e.locations; const auto& locs = e.locations;
llvm::SmallVector<uint64_t, 1> vals; llvm::SmallVector<uint64_t, 1> vals;
// printf("%s: %s\n", p.first.c_str(), e.type->debugName().c_str()); // printf("%s: %s\n", p.first().c_str(), e.type->debugName().c_str());
for (auto& loc : locs) { for (auto& loc : locs) {
vals.push_back(frame_iter->readLocation(loc)); vals.push_back(frame_iter->readLocation(loc));
} }
Box* v = e.type->deserializeFromFrame(vals); Box* v = e.type->deserializeFromFrame(vals);
// printf("%s: (pp id %ld) %p\n", p.first.c_str(), e._debug_pp_id, v); // printf("%s: (pp id %ld) %p\n", p.first().c_str(), e._debug_pp_id, v);
ASSERT(gc::isValidGCObject(v), "%p", v); ASSERT(gc::isValidGCObject(v), "%p", v);
d->d[boxString(p.first)] = v; d->d[boxString(p.first())] = v;
} }
} }
} }
...@@ -961,7 +961,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() { ...@@ -961,7 +961,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
std::unordered_set<std::string> is_undefined; std::unordered_set<std::string> is_undefined;
for (const auto& p : cf->location_map->names) { for (const auto& p : cf->location_map->names) {
if (!startswith(p.first, "!is_defined_")) if (!startswith(p.first(), "!is_defined_"))
continue; continue;
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) { for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
...@@ -971,7 +971,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() { ...@@ -971,7 +971,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
assert(locs.size() == 1); assert(locs.size() == 1);
uint64_t v = impl->readLocation(locs[0]); uint64_t v = impl->readLocation(locs[0]);
if ((v & 1) == 0) if ((v & 1) == 0)
is_undefined.insert(p.first.substr(12)); is_undefined.insert(p.first().substr(12));
break; break;
} }
...@@ -979,13 +979,13 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() { ...@@ -979,13 +979,13 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
} }
for (const auto& p : cf->location_map->names) { for (const auto& p : cf->location_map->names) {
if (p.first[0] == '!') if (p.first()[0] == '!')
continue; continue;
if (p.first[0] == '#') if (p.first()[0] == '#')
continue; continue;
if (is_undefined.count(p.first)) if (is_undefined.count(p.first()))
continue; continue;
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) { for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
...@@ -993,7 +993,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() { ...@@ -993,7 +993,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
const auto& locs = e.locations; const auto& locs = e.locations;
llvm::SmallVector<uint64_t, 1> vals; llvm::SmallVector<uint64_t, 1> vals;
// printf("%s: %s\n", p.first.c_str(), e.type->debugName().c_str()); // printf("%s: %s\n", p.first().c_str(), e.type->debugName().c_str());
// printf("%ld locs\n", locs.size()); // printf("%ld locs\n", locs.size());
for (auto& loc : locs) { for (auto& loc : locs) {
...@@ -1004,9 +1004,9 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() { ...@@ -1004,9 +1004,9 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
} }
Box* v = e.type->deserializeFromFrame(vals); Box* v = e.type->deserializeFromFrame(vals);
// printf("%s: (pp id %ld) %p\n", p.first.c_str(), e._debug_pp_id, v); // printf("%s: (pp id %ld) %p\n", p.first().c_str(), e._debug_pp_id, v);
assert(gc::isValidGCObject(v)); assert(gc::isValidGCObject(v));
d->d[boxString(p.first)] = v; d->d[boxString(p.first())] = v;
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment