Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
ZODB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Kirill Smelkov
ZODB
Commits
1213f8a6
Commit
1213f8a6
authored
Jun 20, 2008
by
Christian Theune
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implemented new bushy layout for blob directories including backwards
compatibility for the old `lawn` layout.
parent
de31996b
Changes
8
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
351 additions
and
73 deletions
+351
-73
src/CHANGES.txt
src/CHANGES.txt
+5
-0
src/ZEO/ClientStorage.py
src/ZEO/ClientStorage.py
+3
-16
src/ZEO/tests/testZEO.py
src/ZEO/tests/testZEO.py
+8
-11
src/ZODB/blob.py
src/ZODB/blob.py
+165
-39
src/ZODB/tests/blob_layout.txt
src/ZODB/tests/blob_layout.txt
+164
-0
src/ZODB/tests/blob_tempdir.txt
src/ZODB/tests/blob_tempdir.txt
+1
-1
src/ZODB/tests/blob_transaction.txt
src/ZODB/tests/blob_transaction.txt
+3
-3
src/ZODB/tests/testblob.py
src/ZODB/tests/testblob.py
+2
-3
No files found.
src/CHANGES.txt
View file @
1213f8a6
...
@@ -8,6 +8,11 @@ Change History
...
@@ -8,6 +8,11 @@ Change History
New Features
New Features
------------
------------
- Changed layout strategy for the blob directory to a bushy approach (8 levels
deep, at most ~256 entries per directory level, one directory for each
blob). Old directories are automatically detected and will be handled with
the old strategy.
- Versions are no-longer supported.
- Versions are no-longer supported.
- ZEO cache files can be larger than 4G. Note that older ZEO cache
- ZEO cache files can be larger than 4G. Note that older ZEO cache
...
...
src/ZEO/ClientStorage.py
View file @
1213f8a6
...
@@ -855,9 +855,7 @@ class ClientStorage(object):
...
@@ -855,9 +855,7 @@ class ClientStorage(object):
def
_storeBlob_shared
(
self
,
oid
,
serial
,
data
,
filename
,
txn
):
def
_storeBlob_shared
(
self
,
oid
,
serial
,
data
,
filename
,
txn
):
# First, move the blob into the blob directory
# First, move the blob into the blob directory
dir
=
self
.
fshelper
.
getPathForOID
(
oid
)
self
.
fshelper
.
getPathForOID
(
oid
,
create
=
True
)
if
not
os
.
path
.
exists
(
dir
):
os
.
mkdir
(
dir
)
fd
,
target
=
self
.
fshelper
.
blob_mkstemp
(
oid
,
serial
)
fd
,
target
=
self
.
fshelper
.
blob_mkstemp
(
oid
,
serial
)
os
.
close
(
fd
)
os
.
close
(
fd
)
...
@@ -924,14 +922,7 @@ class ClientStorage(object):
...
@@ -924,14 +922,7 @@ class ClientStorage(object):
raise
POSException
.
POSKeyError
(
"No blob file"
,
oid
,
serial
)
raise
POSException
.
POSKeyError
(
"No blob file"
,
oid
,
serial
)
# First, we'll create the directory for this oid, if it doesn't exist.
# First, we'll create the directory for this oid, if it doesn't exist.
targetpath
=
self
.
fshelper
.
getPathForOID
(
oid
)
targetpath
=
self
.
fshelper
.
getPathForOID
(
oid
,
create
=
True
)
if
not
os
.
path
.
exists
(
targetpath
):
try
:
os
.
makedirs
(
targetpath
,
0700
)
except
OSError
:
# We might have lost a race. If so, the directory
# must exist now
assert
os
.
path
.
exists
(
targetpath
)
# OK, it's not here and we (or someone) needs to get it. We
# OK, it's not here and we (or someone) needs to get it. We
# want to avoid getting it multiple times. We want to avoid
# want to avoid getting it multiple times. We want to avoid
...
@@ -1118,19 +1109,15 @@ class ClientStorage(object):
...
@@ -1118,19 +1109,15 @@ class ClientStorage(object):
assert
s
==
tid
,
(
s
,
tid
)
assert
s
==
tid
,
(
s
,
tid
)
self
.
_cache
.
store
(
oid
,
s
,
None
,
data
)
self
.
_cache
.
store
(
oid
,
s
,
None
,
data
)
if
self
.
fshelper
is
not
None
:
if
self
.
fshelper
is
not
None
:
blobs
=
self
.
_tbuf
.
blobs
blobs
=
self
.
_tbuf
.
blobs
while
blobs
:
while
blobs
:
oid
,
blobfilename
=
blobs
.
pop
()
oid
,
blobfilename
=
blobs
.
pop
()
targetpath
=
self
.
fshelper
.
getPathForOID
(
oid
)
targetpath
=
self
.
fshelper
.
getPathForOID
(
oid
,
create
=
True
)
if
not
os
.
path
.
exists
(
targetpath
):
os
.
makedirs
(
targetpath
,
0700
)
rename_or_copy_blob
(
blobfilename
,
rename_or_copy_blob
(
blobfilename
,
self
.
fshelper
.
getBlobFilename
(
oid
,
tid
),
self
.
fshelper
.
getBlobFilename
(
oid
,
tid
),
)
)
self
.
_tbuf
.
clear
()
self
.
_tbuf
.
clear
()
def
undo
(
self
,
trans_id
,
txn
):
def
undo
(
self
,
trans_id
,
txn
):
...
...
src/ZEO/tests/testZEO.py
View file @
1213f8a6
...
@@ -515,8 +515,7 @@ class CommonBlobTests:
...
@@ -515,8 +515,7 @@ class CommonBlobTests:
self
.
_storage
.
tpc_abort
(
t
)
self
.
_storage
.
tpc_abort
(
t
)
raise
raise
self
.
assert_
(
not
os
.
path
.
exists
(
tfname
))
self
.
assert_
(
not
os
.
path
.
exists
(
tfname
))
filename
=
os
.
path
.
join
(
self
.
blobdir
,
oid_repr
(
oid
),
filename
=
self
.
_storage
.
fshelper
.
getBlobFilename
(
oid
,
revid
)
tid_repr
(
revid
)
+
BLOB_SUFFIX
)
self
.
assert_
(
os
.
path
.
exists
(
filename
))
self
.
assert_
(
os
.
path
.
exists
(
filename
))
self
.
assertEqual
(
somedata
,
open
(
filename
).
read
())
self
.
assertEqual
(
somedata
,
open
(
filename
).
read
())
...
@@ -631,16 +630,14 @@ class BlobAdaptedFileStorageTests(GenericTests, CommonBlobTests):
...
@@ -631,16 +630,14 @@ class BlobAdaptedFileStorageTests(GenericTests, CommonBlobTests):
d2
=
somedata
.
read
(
8096
)
d2
=
somedata
.
read
(
8096
)
self
.
assertEqual
(
d1
,
d2
)
self
.
assertEqual
(
d1
,
d2
)
# The file should be in the cache ...
# The file should have been copied to the server:
filename
=
self
.
_storage
.
fshelper
.
getBlobFilename
(
oid
,
revid
)
filename
=
os
.
path
.
join
(
self
.
blobdir
,
oid_repr
(
oid
),
tid_repr
(
revid
)
+
BLOB_SUFFIX
)
check_data
(
filename
)
check_data
(
filename
)
#
It should also be in the cache:
#
... and on the server
filename
=
os
.
path
.
join
(
self
.
blob_cache_dir
,
oid_repr
(
oid
),
server_filename
=
filename
.
replace
(
self
.
blob_cache_dir
,
self
.
blobdir
)
tid_repr
(
revid
)
+
BLOB_SUFFIX
)
self
.
assert_
(
server_filename
.
startswith
(
self
.
blobdir
)
)
check_data
(
filename
)
check_data
(
server_
filename
)
# If we remove it from the cache and call loadBlob, it should
# If we remove it from the cache and call loadBlob, it should
# come back. We can do this in many threads. We'll instrument
# come back. We can do this in many threads. We'll instrument
...
...
src/ZODB/blob.py
View file @
1213f8a6
This diff is collapsed.
Click to expand it.
src/ZODB/tests/blob_layout.txt
0 → 100644
View file @
1213f8a6
======================
Blob directory layouts
======================
The internal structure of the blob directories is governed by so called
`layouts`. The current default layout is called `bushy`.
The original blob implementation used a layout that we now call `lawn` and
which is still available for backwards compatibility.
Layouts implement two methods: one for computing a relative path for an
OID and one for turning a relative path back into an OID.
Our terminology is roughly the same as used in `DirectoryStorage`.
The `bushy` layout
==================
The bushy layout splits the OID into the 8 byte parts, reverses them and
creates one directory level for each part, named by the hexlified
representation of the byte value. This results in 8 levels of directories, the
leaf directories being used for the revisions of the blobs and at most 256
entries per directory level:
>>> from ZODB.blob import BushyLayout
>>> bushy = BushyLayout()
>>> bushy.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x00')
'0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x00'
>>> bushy.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x01')
'0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00'
>>> bushy.path_to_oid('0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
'\x00\x00\x00\x00\x00\x00\x00\x01'
Paths that do not represent an OID will cause a ValueError:
>>> bushy.path_to_oid('tmp')
Traceback (most recent call last):
ValueError: Not a valid OID path: tmp
The `lawn` layout
=================
The lawn layout creates on directory for each blob named by the blob's hex
representation of its OID. This has some limitations on various file systems
like performance penalties or the inability to store more than a given number
of blobs at the same time (e.g. 32k on ext3).
>>> from ZODB.blob import LawnLayout
>>> lawn = LawnLayout()
>>> lawn.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x00')
'0x00'
>>> lawn.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x01')
'0x01'
>>> lawn.path_to_oid('0x01')
'\x00\x00\x00\x00\x00\x00\x00\x01'
Paths that do not represent an OID will cause a ValueError:
>>> lawn.path_to_oid('tmp')
Traceback (most recent call last):
ValueError: Not a valid OID path: tmp
Auto-detecting the layout of a directory
========================================
To allow easier migration, we provide an auto-detection feature that analyses a
blob directory and decides for a strategy to use. In general it prefers to
choose the `bushy` layout, except if it determines that the directory has
already been used to create a lawn structure.
>>> from ZODB.blob import auto_layout_select
1. Non-existing directories will trigger a bushy layout:
>>> import tempfile
>>> import shutil
>>> d = tempfile.mkdtemp()
>>> shutil.rmtree(d)
>>> auto_layout_select(d)
'bushy'
2. Empty directories will trigger a bushy layout too:
>>> d = tempfile.mkdtemp()
>>> auto_layout_select(d)
'bushy'
3. If the directory contains a marker for the strategy it will be used:
>>> from ZODB.blob import LAYOUT_MARKER
>>> import os.path
>>> open(os.path.join(d, LAYOUT_MARKER), 'wb').write('bushy')
>>> auto_layout_select(d)
'bushy'
>>> open(os.path.join(d, LAYOUT_MARKER), 'wb').write('lawn')
>>> auto_layout_select(d)
'lawn'
>>> shutil.rmtree(d)
4. If the directory does not contain a marker but other files, we assume that
it was created with an earlier version of the blob implementation and uses our
`lawn` layout:
>>> d = tempfile.mkdtemp()
>>> open(os.path.join(d, '0x0101'), 'wb').write('foo')
>>> auto_layout_select(d)
'lawn'
>>> shutil.rmtree(d)
Directory layout markers
========================
When the file system helper (FSH) is asked to create the directory structure,
it will leave a marker with the choosen layout if no marker exists yet:
>>> from ZODB.blob import FilesystemHelper
>>> d = tempfile.mkdtemp()
>>> blobs = os.path.join(d, 'blobs')
>>> fsh = FilesystemHelper(blobs)
>>> fsh.layout_name
'bushy'
>>> fsh.create()
>>> open(os.path.join(blobs, LAYOUT_MARKER), 'rb').read()
'bushy'
If the FSH finds a marker, then it verifies whether its content matches the
strategy that was chosen. It will raise an exception if we try to work with a
directory that has a different marker than the chosen strategy:
>>> fsh = FilesystemHelper(blobs, 'lawn')
>>> fsh.layout_name
'lawn'
>>> fsh.create() # doctest: +ELLIPSIS
Traceback (most recent call last):
ValueError: Directory layout `lawn` selected for blob directory /.../blobs/, but marker found for layout `bushy`
>>> shutil.rmtree(blobs)
This function interacts with the automatic detection in the way, that an
unmarked directory will be marked the first time when it is auto-guessed and
the marker will be used in the future:
>>> import ZODB.FileStorage
>>> from ZODB.blob import BlobStorage
>>> datafs = os.path.join(d, 'data.fs')
>>> base_storage = ZODB.FileStorage.FileStorage(datafs)
>>> os.mkdir(blobs)
>>> open(os.path.join(blobs, 'foo'), 'wb').write('foo')
>>> blob_storage = BlobStorage(blobs, base_storage)
>>> blob_storage.fshelper.layout_name
'lawn'
>>> open(os.path.join(blobs, LAYOUT_MARKER), 'rb').read()
'lawn'
>>> blob_storage = BlobStorage(blobs, base_storage, layout='bushy') # doctest: +ELLIPSIS
Traceback (most recent call last):
ValueError: Directory layout `bushy` selected for blob directory /.../blobs/, but marker found for layout `lawn`
>>> shutil.rmtree(d)
src/ZODB/tests/blob_tempdir.txt
View file @
1213f8a6
...
@@ -32,7 +32,7 @@ First, we need a datatabase with blob support::
...
@@ -32,7 +32,7 @@ First, we need a datatabase with blob support::
>>> from ZODB.DB import DB
>>> from ZODB.DB import DB
>>> from tempfile import mkdtemp
>>> from tempfile import mkdtemp
>>> import os.path
>>> import os.path
>>> base_storage = MappingStorage(
"test"
)
>>> base_storage = MappingStorage(
'test'
)
>>> blob_dir = mkdtemp()
>>> blob_dir = mkdtemp()
>>> blob_storage = BlobStorage(blob_dir, base_storage)
>>> blob_storage = BlobStorage(blob_dir, base_storage)
>>> database = DB(blob_storage)
>>> database = DB(blob_storage)
...
...
src/ZODB/tests/blob_transaction.txt
View file @
1213f8a6
...
@@ -322,9 +322,9 @@ clean up dirty files:
...
@@ -322,9 +322,9 @@ clean up dirty files:
>>> base_storage = DummyBaseStorage()
>>> base_storage = DummyBaseStorage()
>>> blob_dir2 = mkdtemp()
>>> blob_dir2 = mkdtemp()
>>> blob_storage2 = BlobStorage(blob_dir2, base_storage)
>>> blob_storage2 = BlobStorage(blob_dir2, base_storage)
>>> committed_blob_dir =
os.path.join(blob_dir2, '0'
)
>>> committed_blob_dir =
blob_storage2.fshelper.getPathForOID(0
)
>>>
committed_blob_file = os.path.join(committed_blob_dir, '0.blob'
)
>>>
os.makedirs(committed_blob_dir
)
>>>
os.mkdir(committed_blob_dir
)
>>>
committed_blob_file = blob_storage2.fshelper.getBlobFilename(0, 0
)
>>> open(os.path.join(committed_blob_file), 'w').write('foo')
>>> open(os.path.join(committed_blob_file), 'w').write('foo')
>>> os.path.exists(committed_blob_file)
>>> os.path.exists(committed_blob_file)
True
True
...
...
src/ZODB/tests/testblob.py
View file @
1213f8a6
...
@@ -105,7 +105,6 @@ class BlobUndoTests(unittest.TestCase):
...
@@ -105,7 +105,6 @@ class BlobUndoTests(unittest.TestCase):
self
.
here
=
os
.
getcwd
()
self
.
here
=
os
.
getcwd
()
os
.
chdir
(
self
.
test_dir
)
os
.
chdir
(
self
.
test_dir
)
self
.
storagefile
=
'Data.fs'
self
.
storagefile
=
'Data.fs'
os
.
mkdir
(
'blobs'
)
self
.
blob_dir
=
'blobs'
self
.
blob_dir
=
'blobs'
def
tearDown
(
self
):
def
tearDown
(
self
):
...
@@ -483,7 +482,7 @@ def loadblob_tmpstore():
...
@@ -483,7 +482,7 @@ def loadblob_tmpstore():
We can access the blob correctly:
We can access the blob correctly:
>>> tmpstore.loadBlob(blob_oid, tid) # doctest: +ELLIPSIS
>>> tmpstore.loadBlob(blob_oid, tid) # doctest: +ELLIPSIS
'.../0x01/0x...blob'
'.../0x01/0x
00/0x00/0x00/0x00/0x00/0x00/0x00/0x
...blob'
Clean up:
Clean up:
...
@@ -503,7 +502,7 @@ def test_suite():
...
@@ -503,7 +502,7 @@ def test_suite():
suite
.
addTest
(
doctest
.
DocFileSuite
(
suite
.
addTest
(
doctest
.
DocFileSuite
(
"blob_basic.txt"
,
"blob_connection.txt"
,
"blob_transaction.txt"
,
"blob_basic.txt"
,
"blob_connection.txt"
,
"blob_transaction.txt"
,
"blob_packing.txt"
,
"blob_importexport.txt"
,
"blob_consume.txt"
,
"blob_packing.txt"
,
"blob_importexport.txt"
,
"blob_consume.txt"
,
"blob_tempdir.txt"
,
"blob_tempdir.txt"
,
"blob_layout.txt"
,
setUp
=
ZODB
.
tests
.
util
.
setUp
,
setUp
=
ZODB
.
tests
.
util
.
setUp
,
tearDown
=
ZODB
.
tests
.
util
.
tearDown
,
tearDown
=
ZODB
.
tests
.
util
.
tearDown
,
))
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment