Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
zodb
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Joshua
zodb
Commits
a187da03
Commit
a187da03
authored
Nov 05, 2013
by
Tres Seaver
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #14 from zopefoundation/repozo-verify
This adds 'repozo --verify' to check your backup integrity
parents
70a03207
54b06570
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
288 additions
and
42 deletions
+288
-42
src/ZODB/scripts/repozo.py
src/ZODB/scripts/repozo.py
+138
-22
src/ZODB/scripts/tests/test_repozo.py
src/ZODB/scripts/tests/test_repozo.py
+150
-20
No files found.
src/ZODB/scripts/repozo.py
View file @
a187da03
#!/usr/bin/env python
2.3
#!/usr/bin/env python
# repozo.py -- incremental and full backups of a Data.fs file.
#
...
...
@@ -18,6 +18,9 @@ Where:
-R / --recover
Restore a ZODB file from a backup.
-V / --verify
Verify backup integrity.
-v / --verbose
Verbose mode.
...
...
@@ -69,18 +72,17 @@ Options for -R/--recover:
Note: for the stdout case, the index file will **not** be restored
automatically.
Options for -V/--verify:
-Q / --quick
Verify file sizes only (skip md5 checksums).
"""
from
__future__
import
print_function
import
os
import
shutil
import
sys
from
six.moves
import
filter
try
:
# the hashlib package is available from Python 2.5
from
hashlib
import
md5
except
ImportError
:
# the md5 package is deprecated in Python 2.6
from
md5
import
new
as
md5
from
hashlib
import
md5
import
gzip
import
time
import
errno
...
...
@@ -92,6 +94,7 @@ program = sys.argv[0]
BACKUP
=
1
RECOVER
=
2
VERIFY
=
3
COMMASPACE
=
', '
READCHUNK
=
16
*
1024
...
...
@@ -106,6 +109,18 @@ class NoFiles(Exception):
pass
class
_GzipCloser
(
object
):
def
__init__
(
self
,
fqn
,
mode
):
self
.
_opened
=
gzip
.
open
(
fqn
,
mode
)
def
__enter__
(
self
):
return
self
.
_opened
def
__exit__
(
self
,
exc_type
,
exc_value
,
traceback
):
self
.
_opened
.
close
()
def
usage
(
code
,
msg
=
''
):
outfp
=
sys
.
stderr
if
code
==
0
:
...
...
@@ -124,12 +139,17 @@ def log(msg, *args):
print
(
msg
%
args
,
file
=
sys
.
stderr
)
def
error
(
msg
,
*
args
):
print
(
msg
%
args
,
file
=
sys
.
stderr
)
def
parseargs
(
argv
):
global
VERBOSE
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
'BRvhr:f:FQzkD:o:'
,
opts
,
args
=
getopt
.
getopt
(
argv
,
'BR
V
vhr:f:FQzkD:o:'
,
[
'backup'
,
'recover'
,
'verify'
'verbose'
,
'help'
,
'repository='
,
...
...
@@ -145,7 +165,7 @@ def parseargs(argv):
usage
(
1
,
msg
)
class
Options
:
mode
=
None
# BACKUP
or RECOVER
mode
=
None
# BACKUP
, RECOVER or VERIFY
file
=
None
# name of input Data.fs file
repository
=
None
# name of directory holding backups
full
=
False
# True forces full backup
...
...
@@ -164,12 +184,16 @@ def parseargs(argv):
VERBOSE
=
True
elif
opt
in
(
'-R'
,
'--recover'
):
if
options
.
mode
is
not
None
:
usage
(
1
,
'-B
and -R
are mutually exclusive'
)
usage
(
1
,
'-B
, -R, and -V
are mutually exclusive'
)
options
.
mode
=
RECOVER
elif
opt
in
(
'-B'
,
'--backup'
):
if
options
.
mode
is
not
None
:
usage
(
1
,
'-B
and -R
are mutually exclusive'
)
usage
(
1
,
'-B
, -R, and -V
are mutually exclusive'
)
options
.
mode
=
BACKUP
elif
opt
in
(
'-V'
,
'--verify'
):
if
options
.
mode
is
not
None
:
usage
(
1
,
'-B, -R, and -V are mutually exclusive'
)
options
.
mode
=
VERIFY
elif
opt
in
(
'-Q'
,
'--quick'
):
options
.
quick
=
True
elif
opt
in
(
'-f'
,
'--file'
):
...
...
@@ -195,7 +219,7 @@ def parseargs(argv):
# Sanity checks
if
options
.
mode
is
None
:
usage
(
1
,
'Either --backup
or --recover
is required'
)
usage
(
1
,
'Either --backup
, --recover or --verify
is required'
)
if
options
.
repository
is
None
:
usage
(
1
,
'--repository is required'
)
if
options
.
mode
==
BACKUP
:
...
...
@@ -205,14 +229,33 @@ def parseargs(argv):
if
options
.
output
is
not
None
:
log
(
'--output option is ignored in backup mode'
)
options
.
output
=
None
else
:
assert
options
.
mode
==
RECOVER
elif
options
.
mode
==
RECOVER
:
if
options
.
file
is
not
None
:
log
(
'--file option is ignored in recover mode'
)
options
.
file
=
None
if
options
.
killold
is
not
None
:
if
options
.
killold
:
log
(
'--kill-old-on-full option is ignored in recover mode'
)
options
.
killold
=
None
options
.
killold
=
False
else
:
assert
options
.
mode
==
VERIFY
if
options
.
date
is
not
None
:
log
(
"--date option is ignored in verify mode"
)
options
.
date
=
None
if
options
.
output
is
not
None
:
log
(
'--output option is ignored in verify mode'
)
options
.
output
=
None
if
options
.
full
:
log
(
'--full option is ignored in verify mode'
)
options
.
full
=
False
if
options
.
gzip
:
log
(
'--gzip option is ignored in verify mode'
)
options
.
gzip
=
False
if
options
.
file
is
not
None
:
log
(
'--file option is ignored in verify mode'
)
options
.
file
=
None
if
options
.
killold
:
log
(
'--kill-old-on-full option is ignored in verify mode'
)
options
.
killold
=
False
return
options
...
...
@@ -256,6 +299,22 @@ def checksum(fp, n):
return
sum
.
hexdigest
()
def
file_size
(
fp
):
# Compute number of bytes that can be read from fp
def
func
(
data
):
pass
return
dofile
(
func
,
fp
,
None
)
def
checksum_and_size
(
fp
):
# Checksum and return it with the size of the file
sum
=
md5
()
def
func
(
data
):
sum
.
update
(
data
)
size
=
dofile
(
func
,
fp
,
None
)
return
sum
.
hexdigest
(),
size
def
copyfile
(
options
,
dst
,
start
,
n
):
# Copy bytes from file src, to file dst, starting at offset start, for n
# length of bytes. For robustness, we first write, flush and fsync
...
...
@@ -608,6 +667,60 @@ def do_recover(options):
log
(
'No index file to restore: %s'
,
source_index
)
def
do_verify
(
options
):
# Verify the sizes and checksums of all files mentioned in the .dat file
repofiles
=
find_files
(
options
)
if
not
repofiles
:
raise
NoFiles
(
'No files in repository'
)
datfile
=
os
.
path
.
splitext
(
repofiles
[
0
])[
0
]
+
'.dat'
with
open
(
datfile
)
as
fp
:
for
line
in
fp
:
fn
,
startpos
,
endpos
,
sum
=
line
.
split
()
startpos
=
int
(
startpos
)
endpos
=
int
(
endpos
)
filename
=
os
.
path
.
join
(
options
.
repository
,
os
.
path
.
basename
(
fn
))
expected_size
=
endpos
-
startpos
log
(
"Verifying %s"
,
filename
)
try
:
if
filename
.
endswith
(
'fsz'
):
actual_sum
,
size
=
get_checksum_and_size_of_gzipped_file
(
filename
,
options
.
quick
)
when_uncompressed
=
' (when uncompressed)'
else
:
actual_sum
,
size
=
get_checksum_and_size_of_file
(
filename
,
options
.
quick
)
when_uncompressed
=
''
except
IOError
:
error
(
"%s is missing"
,
filename
)
continue
if
size
!=
expected_size
:
error
(
"%s is %d bytes%s, should be %d bytes"
,
filename
,
size
,
when_uncompressed
,
expected_size
)
elif
not
options
.
quick
:
if
actual_sum
!=
sum
:
error
(
"%s has checksum %s%s instead of %s"
,
filename
,
actual_sum
,
when_uncompressed
,
sum
)
def
get_checksum_and_size_of_gzipped_file
(
filename
,
quick
):
with
_GzipCloser
(
filename
,
'rb'
)
as
fp
:
if
quick
:
return
None
,
file_size
(
fp
)
else
:
return
checksum_and_size
(
fp
)
def
get_checksum_and_size_of_file
(
filename
,
quick
):
with
open
(
filename
,
'rb'
)
as
fp
:
fp
.
seek
(
0
,
2
)
actual_size
=
fp
.
tell
()
if
quick
:
actual_sum
=
None
else
:
fp
.
seek
(
0
)
actual_sum
=
checksum
(
fp
,
actual_size
)
return
actual_sum
,
actual_size
def
main
(
argv
=
None
):
if
argv
is
None
:
argv
=
sys
.
argv
[
1
:]
...
...
@@ -616,15 +729,18 @@ def main(argv=None):
try
:
do_backup
(
options
)
except
WouldOverwriteFiles
as
e
:
print
(
str
(
e
),
file
=
sys
.
stderr
)
sys
.
exit
(
1
)
else
:
assert
options
.
mode
==
RECOVER
sys
.
exit
(
str
(
e
))
elif
options
.
mode
==
RECOVER
:
try
:
do_recover
(
options
)
except
NoFiles
as
e
:
print
(
str
(
e
),
file
=
sys
.
stderr
)
sys
.
exit
(
1
)
sys
.
exit
(
str
(
e
))
else
:
assert
options
.
mode
==
VERIFY
try
:
do_verify
(
options
)
except
NoFiles
as
e
:
sys
.
exit
(
str
(
e
))
if
__name__
==
'__main__'
:
...
...
src/ZODB/scripts/tests/test_repozo.py
View file @
a187da03
...
...
@@ -14,12 +14,7 @@
from
__future__
import
print_function
import
unittest
import
os
try
:
# the hashlib package is available from Python 2.5
from
hashlib
import
md5
except
ImportError
:
# the md5 package is deprecated in Python 2.6
from
md5
import
new
as
md5
from
hashlib
import
md5
import
ZODB.tests.util
# layer used at class scope
...
...
@@ -31,19 +26,6 @@ except ImportError:
_NOISY
=
os
.
environ
.
get
(
'NOISY_REPOZO_TEST_OUTPUT'
)
class
_GzipCloser
(
object
):
def
__init__
(
self
,
fqn
,
mode
):
import
gzip
self
.
_opened
=
gzip
.
open
(
fqn
,
mode
)
def
__enter__
(
self
):
return
self
.
_opened
def
__exit__
(
self
,
exc_type
,
exc_value
,
traceback
):
self
.
_opened
.
close
()
def
_write_file
(
name
,
bits
,
mode
=
'wb'
):
with
open
(
name
,
mode
)
as
f
:
f
.
write
(
bits
)
...
...
@@ -198,9 +180,10 @@ class OptionsTestBase:
def
_makeOptions
(
self
,
**
kw
):
import
tempfile
self
.
_repository_directory
=
tempfile
.
mkdtemp
()
self
.
_repository_directory
=
tempfile
.
mkdtemp
(
prefix
=
'test-repozo-'
)
class
Options
(
object
):
repository
=
self
.
_repository_directory
date
=
None
def
__init__
(
self
,
**
kw
):
self
.
__dict__
.
update
(
kw
)
return
Options
(
**
kw
)
...
...
@@ -222,6 +205,7 @@ class Test_copyfile(OptionsTestBase, unittest.TestCase):
self
.
assertEqual
(
_read_file
(
target
),
b'x'
*
100
)
def
test_w_gzip
(
self
):
from
ZODB.scripts.repozo
import
_GzipCloser
options
=
self
.
_makeOptions
(
gzip
=
True
)
source
=
options
.
file
=
os
.
path
.
join
(
self
.
_repository_directory
,
'source.txt'
)
...
...
@@ -240,6 +224,7 @@ class Test_concat(OptionsTestBase, unittest.TestCase):
return
concat
(
files
,
ofp
)
def
_makeFile
(
self
,
name
,
text
,
gzip_file
=
False
):
from
ZODB.scripts.repozo
import
_GzipCloser
import
tempfile
if
self
.
_repository_directory
is
None
:
self
.
_repository_directory
=
tempfile
.
mkdtemp
()
...
...
@@ -789,6 +774,150 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
self
.
assertEqual
(
_read_file
(
output
),
b'AAABBB'
)
self
.
assertEqual
(
_read_file
(
index
),
b'CCC'
)
class
Test_do_verify
(
OptionsTestBase
,
unittest
.
TestCase
):
def
_callFUT
(
self
,
options
):
from
ZODB.scripts
import
repozo
errors
=
[]
orig_error
=
repozo
.
error
def
_error
(
msg
,
*
args
):
errors
.
append
(
msg
%
args
)
repozo
.
error
=
_error
try
:
repozo
.
do_verify
(
options
)
return
errors
finally
:
repozo
.
error
=
orig_error
def
_makeFile
(
self
,
hour
,
min
,
sec
,
ext
,
text
=
None
):
from
ZODB.scripts.repozo
import
_GzipCloser
assert
self
.
_repository_directory
,
'call _makeOptions first!'
name
=
'2010-05-14-%02d-%02d-%02d%s'
%
(
hour
,
min
,
sec
,
ext
)
if
text
is
None
:
text
=
name
fqn
=
os
.
path
.
join
(
self
.
_repository_directory
,
name
)
if
ext
.
endswith
(
'fsz'
):
_opener
=
_GzipCloser
else
:
_opener
=
open
with
_opener
(
fqn
,
'wb'
)
as
f
:
f
.
write
(
text
.
encode
())
f
.
flush
()
return
fqn
def
test_no_files
(
self
):
from
ZODB.scripts.repozo
import
NoFiles
options
=
self
.
_makeOptions
()
self
.
assertRaises
(
NoFiles
,
self
.
_callFUT
,
options
)
def
test_all_is_fine
(
self
):
options
=
self
.
_makeOptions
(
quick
=
False
)
self
.
_makeFile
(
2
,
3
,
4
,
'.fs'
,
'AAA'
)
self
.
_makeFile
(
4
,
5
,
6
,
'.deltafs'
,
'BBBB'
)
self
.
_makeFile
(
2
,
3
,
4
,
'.dat'
,
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7
\
n
'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60
\
n
'
)
self
.
assertEqual
(
self
.
_callFUT
(
options
),
[])
def
test_all_is_fine_gzip
(
self
):
options
=
self
.
_makeOptions
(
quick
=
False
)
self
.
_makeFile
(
2
,
3
,
4
,
'.fsz'
,
'AAA'
)
self
.
_makeFile
(
4
,
5
,
6
,
'.deltafsz'
,
'BBBB'
)
self
.
_makeFile
(
2
,
3
,
4
,
'.dat'
,
'/backup/2010-05-14-02-03-04.fsz 0 3 e1faffb3e614e6c2fba74296962386b7
\
n
'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 f50881ced34c7d9e6bce100bf33dec60
\
n
'
)
self
.
assertEqual
(
self
.
_callFUT
(
options
),
[])
def
test_missing_file
(
self
):
options
=
self
.
_makeOptions
(
quick
=
True
)
self
.
_makeFile
(
2
,
3
,
4
,
'.fs'
,
'AAA'
)
self
.
_makeFile
(
2
,
3
,
4
,
'.dat'
,
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7
\
n
'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60
\
n
'
)
self
.
assertEqual
(
self
.
_callFUT
(
options
),
[
options
.
repository
+
os
.
path
.
sep
+
'2010-05-14-04-05-06.deltafs is missing'
])
def
test_missing_file_gzip
(
self
):
options
=
self
.
_makeOptions
(
quick
=
True
)
self
.
_makeFile
(
2
,
3
,
4
,
'.fsz'
,
'AAA'
)
self
.
_makeFile
(
2
,
3
,
4
,
'.dat'
,
'/backup/2010-05-14-02-03-04.fsz 0 3 e1faffb3e614e6c2fba74296962386b7
\
n
'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 f50881ced34c7d9e6bce100bf33dec60
\
n
'
)
self
.
assertEqual
(
self
.
_callFUT
(
options
),
[
options
.
repository
+
os
.
path
.
sep
+
'2010-05-14-04-05-06.deltafsz is missing'
])
def
test_bad_size
(
self
):
options
=
self
.
_makeOptions
(
quick
=
False
)
self
.
_makeFile
(
2
,
3
,
4
,
'.fs'
,
'AAA'
)
self
.
_makeFile
(
4
,
5
,
6
,
'.deltafs'
,
'BBB'
)
self
.
_makeFile
(
2
,
3
,
4
,
'.dat'
,
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7
\
n
'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60
\
n
'
)
self
.
assertEqual
(
self
.
_callFUT
(
options
),
[
options
.
repository
+
os
.
path
.
sep
+
'2010-05-14-04-05-06.deltafs is 3 bytes,'
' should be 4 bytes'
])
def
test_bad_size_gzip
(
self
):
options
=
self
.
_makeOptions
(
quick
=
False
)
self
.
_makeFile
(
2
,
3
,
4
,
'.fsz'
,
'AAA'
)
self
.
_makeFile
(
4
,
5
,
6
,
'.deltafsz'
,
'BBB'
)
self
.
_makeFile
(
2
,
3
,
4
,
'.dat'
,
'/backup/2010-05-14-02-03-04.fsz 0 3 e1faffb3e614e6c2fba74296962386b7
\
n
'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 f50881ced34c7d9e6bce100bf33dec60
\
n
'
)
self
.
assertEqual
(
self
.
_callFUT
(
options
),
[
options
.
repository
+
os
.
path
.
sep
+
'2010-05-14-04-05-06.deltafsz is 3 bytes (when uncompressed),'
' should be 4 bytes'
])
def
test_bad_checksum
(
self
):
options
=
self
.
_makeOptions
(
quick
=
False
)
self
.
_makeFile
(
2
,
3
,
4
,
'.fs'
,
'AAA'
)
self
.
_makeFile
(
4
,
5
,
6
,
'.deltafs'
,
'BbBB'
)
self
.
_makeFile
(
2
,
3
,
4
,
'.dat'
,
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7
\
n
'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60
\
n
'
)
self
.
assertEqual
(
self
.
_callFUT
(
options
),
[
options
.
repository
+
os
.
path
.
sep
+
'2010-05-14-04-05-06.deltafs has checksum'
' 36486440db255f0ee6ab109d5d231406 instead of'
' f50881ced34c7d9e6bce100bf33dec60'
])
def
test_bad_checksum_gzip
(
self
):
options
=
self
.
_makeOptions
(
quick
=
False
)
self
.
_makeFile
(
2
,
3
,
4
,
'.fsz'
,
'AAA'
)
self
.
_makeFile
(
4
,
5
,
6
,
'.deltafsz'
,
'BbBB'
)
self
.
_makeFile
(
2
,
3
,
4
,
'.dat'
,
'/backup/2010-05-14-02-03-04.fsz 0 3 e1faffb3e614e6c2fba74296962386b7
\
n
'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 f50881ced34c7d9e6bce100bf33dec60
\
n
'
)
self
.
assertEqual
(
self
.
_callFUT
(
options
),
[
options
.
repository
+
os
.
path
.
sep
+
'2010-05-14-04-05-06.deltafsz has checksum'
' 36486440db255f0ee6ab109d5d231406 (when uncompressed) instead of'
' f50881ced34c7d9e6bce100bf33dec60'
])
def
test_quick_ignores_checksums
(
self
):
options
=
self
.
_makeOptions
(
quick
=
True
)
self
.
_makeFile
(
2
,
3
,
4
,
'.fs'
,
'AAA'
)
self
.
_makeFile
(
4
,
5
,
6
,
'.deltafs'
,
'BBBB'
)
self
.
_makeFile
(
2
,
3
,
4
,
'.dat'
,
'/backup/2010-05-14-02-03-04.fs 0 3 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
\
n
'
'/backup/2010-05-14-04-05-06.deltafs 3 7 bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
\
n
'
)
self
.
assertEqual
(
self
.
_callFUT
(
options
),
[])
def
test_quick_ignores_checksums_gzip
(
self
):
options
=
self
.
_makeOptions
(
quick
=
True
)
self
.
_makeFile
(
2
,
3
,
4
,
'.fsz'
,
'AAA'
)
self
.
_makeFile
(
4
,
5
,
6
,
'.deltafsz'
,
'BBBB'
)
self
.
_makeFile
(
2
,
3
,
4
,
'.dat'
,
'/backup/2010-05-14-02-03-04.fsz 0 3 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
\
n
'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
\
n
'
)
self
.
assertEqual
(
self
.
_callFUT
(
options
),
[])
class
MonteCarloTests
(
unittest
.
TestCase
):
layer
=
ZODB
.
tests
.
util
.
MininalTestLayer
(
'repozo'
)
...
...
@@ -902,6 +1031,7 @@ def test_suite():
unittest
.
makeSuite
(
Test_do_incremental_backup
),
#unittest.makeSuite(Test_do_backup), #TODO
unittest
.
makeSuite
(
Test_do_recover
),
unittest
.
makeSuite
(
Test_do_verify
),
# N.B.: this test take forever to run (~40sec on a fast laptop),
# *and* it is non-deterministic.
unittest
.
makeSuite
(
MonteCarloTests
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment