Commit 9d15654f authored by Michael Droettboom's avatar Michael Droettboom Committed by GitHub

Merge pull request #170 from mdboom/lz4-c

WIP: Use LZ4 filesystem compression, using the canonical C implementation
parents 065c572a 4adce231
......@@ -21,3 +21,4 @@ ccache
/emsdk/emsdk
/six/six-1.11.0
/lz4/lz4-1.8.3
......@@ -6,6 +6,8 @@ FILEPACKAGER=$(PYODIDE_ROOT)/tools/file_packager.py
CPYTHONROOT=cpython
CPYTHONLIB=$(CPYTHONROOT)/installs/python-$(PYVERSION)/lib/python$(PYMINOR)
LZ4LIB=lz4/lz4-1.8.3/lib/liblz4.a
CC=emcc
CXX=em++
OPTFLAGS=-O3
......@@ -20,6 +22,7 @@ LDFLAGS=\
-O3 \
-s MODULARIZE=1 \
$(CPYTHONROOT)/installs/python-$(PYVERSION)/lib/libpython$(PYMINOR).a \
lz4/lz4-1.8.3/lib/liblz4.a \
-s "BINARYEN_METHOD='native-wasm'" \
-s TOTAL_MEMORY=1073741824 \
-s ALLOW_MEMORY_GROWTH=1 \
......@@ -34,7 +37,8 @@ LDFLAGS=\
-std=c++14 \
-lstdc++ \
--memory-init-file 0 \
-s TEXTDECODER=0
-s TEXTDECODER=0 \
-s LZ4=1
SIX_ROOT=six/six-1.11.0/build/lib
SIX_LIBS=$(SIX_ROOT)/six.py
......@@ -69,7 +73,7 @@ build/pyodide.asm.js: src/main.bc src/jsimport.bc src/jsproxy.bc src/js2python.b
build/pyodide.asm.data: root/.built
( \
cd build; \
python $(FILEPACKAGER) pyodide.asm.data --preload ../root/lib@lib --js-output=pyodide.asm.data.js --use-preload-plugins \
python $(FILEPACKAGER) pyodide.asm.data --lz4 --preload ../root/lib@lib --js-output=pyodide.asm.data.js --use-preload-plugins \
)
uglifyjs build/pyodide.asm.data.js -o build/pyodide.asm.data.js
......@@ -133,7 +137,7 @@ clean:
echo "The Emsdk and CPython are not cleaned. cd into those directories to do so."
%.bc: %.c $(CPYTHONLIB)
%.bc: %.c $(CPYTHONLIB) $(LZ4LIB)
$(CC) -o $@ -c $< $(CFLAGS)
......@@ -144,7 +148,7 @@ build/test.data: $(CPYTHONLIB)
)
( \
cd build; \
python $(FILEPACKAGER) test.data --preload ../$(CPYTHONLIB)/test@/lib/python3.7/test --js-output=test.js --export-name=pyodide._module --exclude \*.wasm.pre --exclude __pycache__ \
python $(FILEPACKAGER) test.data --lz4 --preload ../$(CPYTHONLIB)/test@/lib/python3.7/test --js-output=test.js --export-name=pyodide._module --exclude \*.wasm.pre --exclude __pycache__ \
)
uglifyjs build/test.js -o build/test.js
......@@ -202,6 +206,10 @@ $(CPYTHONLIB): emsdk/emsdk/.complete ccache/emcc ccache/em++
make -C $(CPYTHONROOT)
$(LZ4LIB):
make -C lz4
$(SIX_LIBS): $(CPYTHONLIB)
make -C six
......
all: emsdk/.complete
# We hack the CPU_CORES, because if you use all of the cores on Circle-CI, you
# run out of memory
# run out of memory.
emsdk/.complete:
if [ -d emsdk ]; then rm -rf emsdk; fi
......
diff --git a/emsdk/emscripten/tag-1.38.10/src/library_lz4.js b/emsdk/emscripten/tag-1.38.10/src/library_lz4.js
index 4c3f583b7..5291002a4 100644
--- a/src/library_lz4.js
+++ b/src/library_lz4.js
@@ -5,26 +5,14 @@ mergeInto(LibraryManager.library, {
DIR_MODE: {{{ cDefine('S_IFDIR') }}} | 511 /* 0777 */,
FILE_MODE: {{{ cDefine('S_IFREG') }}} | 511 /* 0777 */,
CHUNK_SIZE: -1,
- codec: null,
init: function() {
- if (LZ4.codec) return;
- LZ4.codec = (function() {
- {{{ read('mini-lz4.js') }}};
- return MiniLZ4;
- })();
- LZ4.CHUNK_SIZE = LZ4.codec.CHUNK_SIZE;
+ LZ4.CHUNK_SIZE = 2048;
},
loadPackage: function (pack) {
LZ4.init();
var compressedData = pack['compressedData'];
- if (!compressedData) compressedData = LZ4.codec.compressPackage(pack['data']);
+ // if (!compressedData) compressedData = LZ4.codec.compressPackage(pack['data']);
assert(compressedData.cachedIndexes.length === compressedData.cachedChunks.length);
- for (var i = 0; i < compressedData.cachedIndexes.length; i++) {
- compressedData.cachedIndexes[i] = -1;
- compressedData.cachedChunks[i] = compressedData.data.subarray(compressedData.cachedOffset + i*LZ4.CHUNK_SIZE,
- compressedData.cachedOffset + (i+1)*LZ4.CHUNK_SIZE);
- assert(compressedData.cachedChunks[i].length === LZ4.CHUNK_SIZE);
- }
pack['metadata'].files.forEach(function(file) {
var dir = PATH.dirname(file.filename);
var name = PATH.basename(file.filename);
@@ -36,6 +24,12 @@ mergeInto(LibraryManager.library, {
end: file.end,
});
});
+ compressedData.buf = Module['_malloc'](LZ4.CHUNK_SIZE);
+ for (var i = 0; i < compressedData.cachedIndexes.length; i++) {
+ compressedData.cachedIndexes[i] = -1;
+ compressedData.cachedChunks[i] = Module['_malloc'](LZ4.CHUNK_SIZE);
+ assert(compressedData.cachedChunks[i] !== null)
+ }
},
createNode: function (parent, name, mode, dev, contents, mtime) {
var node = FS.createNode(parent, name, mode);
@@ -112,6 +106,7 @@ mergeInto(LibraryManager.library, {
//console.log('LZ4 read ' + [offset, length, position]);
length = Math.min(length, stream.node.size - position);
if (length <= 0) return 0;
+
var contents = stream.node.contents;
var compressedData = contents.compressedData;
var written = 0;
@@ -122,6 +117,8 @@ mergeInto(LibraryManager.library, {
var chunkIndex = Math.floor(start / LZ4.CHUNK_SIZE);
var compressedStart = compressedData.offsets[chunkIndex];
var compressedSize = compressedData.sizes[chunkIndex];
+ var startInChunk = start % LZ4.CHUNK_SIZE;
+ var endInChunk = Math.min(startInChunk + desired, LZ4.CHUNK_SIZE);
var currChunk;
if (compressedData.successes[chunkIndex]) {
var found = compressedData.cachedIndexes.indexOf(chunkIndex);
@@ -138,18 +135,19 @@ mergeInto(LibraryManager.library, {
Module['decompressedChunks'] = (Module['decompressedChunks'] || 0) + 1;
}
var compressed = compressedData.data.subarray(compressedStart, compressedStart + compressedSize);
- //var t = Date.now();
- var originalSize = LZ4.codec.uncompress(compressed, currChunk);
- //console.log('decompress time: ' + (Date.now() - t));
+ // var t = Date.now();
+ // var originalSize = LZ4.codec.uncompress(compressed, currChunk);
+ Module.HEAPU8.set(compressed, compressedData.buf);
+ var originalSize = Module['_LZ4_decompress_safe'](compressedData.buf, currChunk, compressedSize, LZ4.CHUNK_SIZE);
+ // console.log('decompress time: ' + (Date.now() - t));
if (chunkIndex < compressedData.successes.length-1) assert(originalSize === LZ4.CHUNK_SIZE); // all but the last chunk must be full-size
+ buffer.set(Module.HEAPU8.subarray(currChunk + startInChunk, currChunk + endInChunk), offset + written);
}
- } else {
+ }
+ else {
// uncompressed
- currChunk = compressedData.data.subarray(compressedStart, compressedStart + LZ4.CHUNK_SIZE);
+ buffer.set(compressedData.data.subarray(compressedStart + startInChunk, compressedStart + endInChunk), offset + written);
}
- var startInChunk = start % LZ4.CHUNK_SIZE;
- var endInChunk = Math.min(startInChunk + desired, LZ4.CHUNK_SIZE);
- buffer.set(currChunk.subarray(startInChunk, endInChunk), offset + written);
var currWritten = endInChunk - startInChunk;
written += currWritten;
}
@@ -181,4 +179,3 @@ if (LibraryManager.library['$FS__deps']) {
warn('FS does not seem to be in use (no preloaded files etc.), LZ4 will not do anything');
}
#endif
-
PYODIDE_ROOT=$(abspath ..)
include ../Makefile.envs
LZ4VERSION=1.8.3
ROOT=$(abspath .)
SRC=$(ROOT)/lz4-$(LZ4VERSION)
TARBALL=$(ROOT)/downloads/lz4-$(LZ4VERSION).tgz
URL=https://github.com/lz4/lz4/archive/v$(LZ4VERSION).tar.gz
all: $(SRC)/lib/liblz4.a
clean:
-rm -fr downloads
-rm -fr $(SRC)
$(TARBALL):
[ -d $(ROOT)/downloads ] || mkdir $(ROOT)/downloads
wget -q -O $@ $(URL)
# md5sum --quiet --check checksums || (rm $@; false)
$(SRC)/Makefile: $(TARBALL)
tar -C . -xf $(TARBALL)
touch $(SRC)/Makefile
$(SRC)/lib/liblz4.a: $(SRC)/Makefile
( \
cd $(SRC) ; \
emmake make ; \
)
d5ce78f7b1b76002bbfffa6f78a5fc4e downloads/lz4-1.8.3.tgz
......@@ -32,6 +32,49 @@ var languagePluginLoader = new Promise((resolve, reject) => {
}
};
// clang-format off
let preloadWasm = () => {
// On Chrome, we have to instantiate wasm asynchronously. Since that
// can't be done synchronously within the call to dlopen, we instantiate
// every .so that comes our way up front, caching it in the
// `preloadedWasm` dictionary.
let promise = new Promise((resolve) => resolve());
let FS = pyodide._module.FS;
function recurseDir(rootpath) {
let dirs;
try {
dirs = FS.readdir(rootpath);
} catch {
return;
}
for (entry of dirs) {
if (entry.startsWith('.')) {
continue;
}
const path = rootpath + entry;
if (entry.endsWith('.so')) {
if (Module['preloadedWasm'][path] === undefined) {
promise = promise
.then(() => Module['loadWebAssemblyModule'](
FS.readFile(path), true))
.then((module) => {
Module['preloadedWasm'][path] = module;
});
}
} else if (FS.isDir(FS.lookupPath(path).node.mode)) {
recurseDir(path + '/');
}
}
}
recurseDir('/');
return promise;
}
// clang-format on
let _loadPackage = (names) => {
// DFS to find all dependencies of the requested packages
let packages = window.pyodide._module.packages.dependencies;
......@@ -104,7 +147,11 @@ var languagePluginLoader = new Promise((resolve, reject) => {
}
delete window.pyodide._module.monitorRunDependencies;
const packageList = Array.from(Object.keys(toLoad)).join(', ');
resolve(`Loaded ${packageList}`);
if (!isFirefox) {
preloadWasm().then(() => {resolve(`Loaded ${packageList}`)});
} else {
resolve(`Loaded ${packageList}`);
}
}
};
......@@ -196,11 +243,9 @@ var languagePluginLoader = new Promise((resolve, reject) => {
Module.noImageDecoding = true;
Module.noAudioDecoding = true;
Module.noWasmDecoding = true;
Module.preloadedWasm = {};
let isFirefox = navigator.userAgent.toLowerCase().indexOf('firefox') > -1;
if (isFirefox) {
console.log("Skipping wasm decoding");
Module.noWasmDecoding = true;
}
let wasm_promise = WebAssembly.compileStreaming(fetch(wasmURL));
Module.instantiateWasm = (info, receiveInstance) => {
......
......@@ -126,6 +126,7 @@ def package_files(buildpath, srcpath, pkg, args):
'python',
Path(ROOTDIR) / 'file_packager.py',
name + '.data',
'--lz4',
'--preload',
'{}@/'.format(install_prefix),
'--js-output={}'.format(name + '.js'),
......
......@@ -466,8 +466,8 @@ if has_preloaded:
use_data = '''
var compressedData = %s;
compressedData.data = byteArray;
assert(typeof LZ4 === 'object', 'LZ4 not present - was your app build with -s LZ4=1 ?');
LZ4.loadPackage({ 'metadata': metadata, 'compressedData': compressedData });
assert(typeof Module.LZ4 === 'object', 'LZ4 not present - was your app build with -s LZ4=1 ?');
Module.LZ4.loadPackage({ 'metadata': metadata, 'compressedData': compressedData });
Module['removeRunDependency']('datafile_%s');
''' % (meta, shared.JS.escape_for_js_string(data_target))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment