Commit 67a20e2c authored by Patrick Bajao's avatar Patrick Bajao

Cache references in file

This is to improve RAM usage when parsing references utilizing the
existing `cache` struct.

Reduces memory usage when parsing LSIF file of gitlab-workhorse
from 7MB to 4MB.
parent 94ada84e
---
title: Cache references in file
merge_request: 544
author:
type: performance
......@@ -13,11 +13,10 @@ const (
)
type Ranges struct {
DefRefs map[Id]Item
References *References
Hovers *Hovers
Cache *cache
ProcessReferences bool
DefRefs map[Id]Item
References *References
Hovers *Hovers
Cache *cache
}
type RawRange struct {
......@@ -57,6 +56,11 @@ func NewRanges(config Config) (*Ranges, error) {
return nil, err
}
references, err := NewReferences(config)
if err != nil {
return nil, err
}
cache, err := newCache(config.TempPath, "ranges", Range{})
if err != nil {
return nil, err
......@@ -64,7 +68,7 @@ func NewRanges(config Config) (*Ranges, error) {
return &Ranges{
DefRefs: make(map[Id]Item),
References: NewReferences(config),
References: references,
Hovers: hovers,
Cache: cache,
}, nil
......@@ -128,6 +132,7 @@ func (r *Ranges) Serialize(f io.Writer, rangeIds []Id, docs map[Id]string) error
func (r *Ranges) Close() error {
return combineErrors(
r.Cache.Close(),
r.References.Close(),
r.Hovers.Close(),
)
}
......@@ -192,7 +197,9 @@ func (r *Ranges) addItem(line []byte) error {
}
}
r.References.Store(rawItem.RefId, references)
if err := r.References.Store(rawItem.RefId, references); err != nil {
return err
}
return nil
}
......
......@@ -4,8 +4,15 @@ import (
"strconv"
)
type ReferencesOffset struct {
Id Id
Len int32
}
type References struct {
Items map[Id][]Item
Items *cache
Offsets *cache
CurrentOffsetId Id
ProcessReferences bool
}
......@@ -13,17 +20,50 @@ type SerializedReference struct {
Path string `json:"path"`
}
func NewReferences(config Config) *References {
func NewReferences(config Config) (*References, error) {
tempPath := config.TempPath
items, err := newCache(tempPath, "references", Item{})
if err != nil {
return nil, err
}
offsets, err := newCache(tempPath, "references-offsets", ReferencesOffset{})
if err != nil {
return nil, err
}
return &References{
Items: make(map[Id][]Item),
Items: items,
Offsets: offsets,
CurrentOffsetId: 0,
ProcessReferences: config.ProcessReferences,
}
}, nil
}
func (r *References) Store(refId Id, references []Item) {
if r.ProcessReferences {
r.Items[refId] = references
// Store is responsible for keeping track of references that will be used when
// serializing in `For`.
//
// The references are stored in a file to cache them. It is like
// `map[Id][]Item` (where `Id` is `refId`) but relies on caching the array and
// its offset in files for storage to reduce RAM usage. The items can be
// fetched by calling `getItems`.
func (r *References) Store(refId Id, references []Item) error {
size := len(references)
if !r.ProcessReferences || size == 0 {
return nil
}
err := r.Items.SetEntry(r.CurrentOffsetId, references)
if err != nil {
return err
}
r.Offsets.SetEntry(refId, ReferencesOffset{Id: r.CurrentOffsetId, Len: int32(size)})
r.CurrentOffsetId += Id(size)
return nil
}
func (r *References) For(docs map[Id]string, refId Id) []SerializedReference {
......@@ -31,8 +71,8 @@ func (r *References) For(docs map[Id]string, refId Id) []SerializedReference {
return nil
}
references, ok := r.Items[refId]
if !ok {
references := r.getItems(refId)
if references == nil {
return nil
}
......@@ -48,3 +88,24 @@ func (r *References) For(docs map[Id]string, refId Id) []SerializedReference {
return serializedReferences
}
func (r *References) Close() error {
return combineErrors(
r.Items.Close(),
r.Offsets.Close(),
)
}
func (r *References) getItems(refId Id) []Item {
var offset ReferencesOffset
if err := r.Offsets.Entry(refId, &offset); err != nil || offset.Len == 0 {
return nil
}
items := make([]Item, offset.Len)
if err := r.Items.Entry(offset.Id, &items); err != nil {
return nil
}
return items
}
......@@ -12,13 +12,33 @@ func TestReferencesStore(t *testing.T) {
refId = 3
)
r := NewReferences(Config{ProcessReferences: true})
r, err := NewReferences(Config{ProcessReferences: true})
require.NoError(t, err)
r.Store(refId, []Item{{Line: 2, DocId: docId}, {Line: 3, DocId: docId}})
err = r.Store(refId, []Item{{Line: 2, DocId: docId}, {Line: 3, DocId: docId}})
require.NoError(t, err)
docs := map[Id]string{docId: "doc.go"}
serializedReferences := r.For(docs, refId)
require.Contains(t, serializedReferences, SerializedReference{Path: "doc.go#L2"})
require.Contains(t, serializedReferences, SerializedReference{Path: "doc.go#L3"})
require.NoError(t, r.Close())
}
func TestReferencesStoreEmpty(t *testing.T) {
const refId = 3
r, err := NewReferences(Config{ProcessReferences: true})
require.NoError(t, err)
err = r.Store(refId, []Item{})
require.NoError(t, err)
docs := map[Id]string{1: "doc.go"}
serializedReferences := r.For(docs, refId)
require.Nil(t, serializedReferences)
require.NoError(t, r.Close())
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment