Commit 2115c90a authored by Jim Fulton's avatar Jim Fulton Committed by GitHub

Avoid seeking on import (#188)

* Don't seek on imports

(other than the possible seek for custom importers)

We were seeking to handle blob markers. This has two major drawbacks:

1. It wasn't possible to use a non-seekable file.  A use case for
   export/import is to copy database data.  An intermediate file, and
   associated I/O, could be avoided using a pipe, but pipes aren't
   seekable.

2. Seeks cause file-buffer data to be discarded, making IO far more
   expensive.

We didn't really need blob markers, because the preceeding blob data
records serve as markers.  (Now we're stuck with them for backward
compatibility.)

* Make cp's buffer size larger and configurable.

* Use the storage temprary directory when importing blobs

To avoid an extra copy.

Also, allow the copy (cp) buffer sie to be overridden on export.

(I see no obvious way to plumb it on import. :( )

* Oops, need to use a binary literal (Python 3)

* Respond to PR comments
parent de1f24ca
...@@ -31,7 +31,7 @@ logger = logging.getLogger('ZODB.ExportImport') ...@@ -31,7 +31,7 @@ logger = logging.getLogger('ZODB.ExportImport')
class ExportImport(object): class ExportImport(object):
def exportFile(self, oid, f=None): def exportFile(self, oid, f=None, bufsize=64 * 1024):
if f is None: if f is None:
f = TemporaryFile(prefix="EXP") f = TemporaryFile(prefix="EXP")
elif isinstance(f, six.string_types): elif isinstance(f, six.string_types):
...@@ -64,7 +64,7 @@ class ExportImport(object): ...@@ -64,7 +64,7 @@ class ExportImport(object):
f.write(blob_begin_marker) f.write(blob_begin_marker)
f.write(p64(os.stat(blobfilename).st_size)) f.write(p64(os.stat(blobfilename).st_size))
blobdata = open(blobfilename, "rb") blobdata = open(blobfilename, "rb")
cp(blobdata, f) cp(blobdata, f, bufsize=bufsize)
blobdata.close() blobdata.close()
f.write(export_end_marker) f.write(export_end_marker)
...@@ -158,18 +158,23 @@ class ExportImport(object): ...@@ -158,18 +158,23 @@ class ExportImport(object):
oids[ooid] = oid = self._storage.new_oid() oids[ooid] = oid = self._storage.new_oid()
return_oid_list.append(oid) return_oid_list.append(oid)
if (b'blob' in data and
isinstance(self._reader.getGhost(data), Blob)
):
# Blob support # Blob support
blob_begin = f.read(len(blob_begin_marker))
if blob_begin == blob_begin_marker: # Make sure we have a (redundant, overly) blob marker.
if f.read(len(blob_begin_marker)) != blob_begin_marker:
raise ValueError("No data for blob object")
# Copy the blob data to a temporary file # Copy the blob data to a temporary file
# and remember the name # and remember the name
blob_len = u64(f.read(8)) blob_len = u64(f.read(8))
blob_filename = mktemp() blob_filename = mktemp(self._storage.temporaryDirectory())
blob_file = open(blob_filename, "wb") blob_file = open(blob_filename, "wb")
cp(f, blob_file, blob_len) cp(f, blob_file, blob_len)
blob_file.close() blob_file.close()
else: else:
f.seek(-len(blob_begin_marker),1)
blob_filename = None blob_filename = None
pfile = BytesIO(data) pfile = BytesIO(data)
......
...@@ -95,7 +95,7 @@ def u64(v): ...@@ -95,7 +95,7 @@ def u64(v):
U64 = u64 U64 = u64
def cp(f1, f2, length=None): def cp(f1, f2, length=None, bufsize=64 * 1024):
"""Copy all data from one file to another. """Copy all data from one file to another.
It copies the data from the current position of the input file (f1) It copies the data from the current position of the input file (f1)
...@@ -106,7 +106,7 @@ def cp(f1, f2, length=None): ...@@ -106,7 +106,7 @@ def cp(f1, f2, length=None):
""" """
read = f1.read read = f1.read
write = f2.write write = f2.write
n = 8192 n = bufsize
if length is None: if length is None:
old_pos = f1.tell() old_pos = f1.tell()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment