Commit 16c07bf4 authored by bescoto's avatar bescoto

Final changes for 0.11.4


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@301 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent 213a7c33
New in v0.11.4 (2003/04/01)
New in v0.11.4 (2003/03/15)
---------------------------
Fixed bug incrementing sockets whose filenames were pretty long, but
......@@ -7,6 +7,14 @@ not super long. Reported by Olivier Mueller.
Added Albert Chin-A-Young's patch to add a few options to the setup.py
install script.
Apparently fixed rare utime type bug. Thanks to Christian Skarby for
report and testing.
Added detailed file_statistics (in addition to session_statistics) as
requested by Dean Gaudet. Disable with --no-file-statistics option.
Minor speed enhancements.
New in v0.11.3 (2003/03/04)
---------------------------
......
---------[ Medium term ]---------------------------------------
Look at Kent Borg's suggestion for restore options and digests.
......@@ -13,8 +14,6 @@ Add # of increments option to --remove-older-than
Make argument shortcut for cstream
Make --calculate-averages work with directory_statistics file.
Write configuration file, to make sure settings like --quoting-char,
--windows-time-format, etc., don't change between sessions,
backup/restoring, etc.
......
......@@ -218,6 +218,11 @@ Do not compress increments based on files whose filenames match regexp.
The default is
"(?i).*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|jpg|gif|png|jp2|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$"
.TP
.B --no-file-statistics
This will disable writing to the file_statistics file in the
rdiff-backup-data directory. rdiff-backup will run slightly quicker
and take up a bit less space.
.TP
.BI --no-hard-links
Don't replicate hard links on destination side. Note that because
metadata is written to a separate file, hard link information will not
......
......@@ -148,6 +148,10 @@ ssh_compression = 1
# If true, print statistics after successful backup
print_statistics = None
# Controls whether file_statistics file is written in
# rdiff-backup-data dir. These can sometimes take up a lot of space.
file_statistics = 1
# On the writer connection, the following will be set to the mirror
# Select iterator.
select_mirror = None
......
......@@ -53,13 +53,14 @@ def parse_cmdlineoptions(arglist):
"include-globbing-filelist=", "include-regexp=",
"list-changed-since=", "list-increments",
"no-compare-inode", "no-compression",
"no-compression-regexp=", "no-hard-links", "null-separator",
"parsable-output", "print-statistics", "quoting-char=",
"remote-cmd=", "remote-schema=", "remove-older-than=",
"restore-as-of=", "restrict=", "restrict-read-only=",
"restrict-update-only=", "server", "ssh-no-compression",
"terminal-verbosity=", "test-server", "verbosity=",
"version", "windows-mode", "windows-time-format"])
"no-compression-regexp=", "no-file-statistics",
"no-hard-links", "null-separator", "parsable-output",
"print-statistics", "quoting-char=", "remote-cmd=",
"remote-schema=", "remove-older-than=", "restore-as-of=",
"restrict=", "restrict-read-only=", "restrict-update-only=",
"server", "ssh-no-compression", "terminal-verbosity=",
"test-server", "verbosity=", "version", "windows-mode",
"windows-time-format"])
except getopt.error, e:
commandline_error("Bad commandline options: %s" % str(e))
......@@ -108,6 +109,7 @@ def parse_cmdlineoptions(arglist):
elif opt == "--no-compression": Globals.set("compression", None)
elif opt == "--no-compression-regexp":
Globals.set("no_compression_regexp_string", arg)
elif opt == "--no-file-statistics": Globals.set('file_statistics', 0)
elif opt == "--no-hard-links": Globals.set('preserve_hardlinks', 0)
elif opt == "--null-separator": Globals.set("null_separator", 1)
elif opt == "--parsable-output": Globals.set('parsable_output', 1)
......
......@@ -237,14 +237,21 @@ class CacheCollatedPostProcess:
self.iter = collated_iter # generates (source_rorp, dest_rorp) pairs
self.cache_size = cache_size
self.statfileobj = statistics.init_statfileobj()
if Globals.file_statistics: statistics.FileStats.init()
metadata.OpenMetadata()
# the following should map indicies to lists [source_rorp,
# dest_rorp, changed_flag, success_flag] where changed_flag
# should be true if the rorps are different, and success_flag
# should be 1 if dest_rorp has been successfully updated to
# source_rorp, and 2 if the destination file is deleted
# entirely. They both default to false (0).
# the following should map indicies to lists
# [source_rorp, dest_rorp, changed_flag, success_flag, increment]
# changed_flag should be true if the rorps are different, and
# success_flag should be 1 if dest_rorp has been successfully
# updated to source_rorp, and 2 if the destination file is
# deleted entirely. They both default to false (0).
# increment holds the RPath of the increment file if one
# exists. It is used to record file statistics.
self.cache_dict = {}
self.cache_indicies = []
......@@ -255,7 +262,7 @@ class CacheCollatedPostProcess:
source_rorp, dest_rorp = self.iter.next()
self.pre_process(source_rorp, dest_rorp)
index = source_rorp and source_rorp.index or dest_rorp.index
self.cache_dict[index] = [source_rorp, dest_rorp, 0, 0]
self.cache_dict[index] = [source_rorp, dest_rorp, 0, 0, None]
self.cache_indicies.append(index)
if len(self.cache_indicies) > self.cache_size: self.shorten_cache()
......@@ -276,15 +283,17 @@ class CacheCollatedPostProcess:
"""Remove one element from cache, possibly adding it to metadata"""
first_index = self.cache_indicies[0]
del self.cache_indicies[0]
old_source_rorp, old_dest_rorp, changed_flag, success_flag = \
old_source_rorp, old_dest_rorp, changed_flag, success_flag, inc = \
self.cache_dict[first_index]
del self.cache_dict[first_index]
self.post_process(old_source_rorp, old_dest_rorp,
changed_flag, success_flag)
changed_flag, success_flag, inc)
def post_process(self, source_rorp, dest_rorp, changed, success):
def post_process(self, source_rorp, dest_rorp, changed, success, inc):
"""Post process source_rorp and dest_rorp.
The point of this is to write statistics and metadata.
changed will be true if the files have changed. success will
be true if the files have been successfully updated (this is
always false for un-changed files).
......@@ -294,12 +303,14 @@ class CacheCollatedPostProcess:
if source_rorp: self.statfileobj.add_source_file(source_rorp)
if dest_rorp: self.statfileobj.add_dest_file(dest_rorp)
if success == 0: metadata_rorp = dest_rorp
elif success == 1:
elif success == 1 or success == 2:
self.statfileobj.add_changed(source_rorp, dest_rorp)
metadata_rorp = source_rorp
else: metadata_rorp = None
if metadata_rorp and metadata_rorp.lstat():
metadata.WriteMetadata(metadata_rorp)
if Globals.file_statistics:
statistics.FileStats.update(source_rorp, dest_rorp, changed, inc)
def in_cache(self, index):
"""Return true if given index is cached"""
......@@ -317,6 +328,10 @@ class CacheCollatedPostProcess:
"""Signal that the file with given index has changed"""
self.cache_dict[index][2] = 1
def set_inc(self, index, inc):
"""Set the increment of the current file"""
self.cache_dict[index][4] = inc
def get_rorps(self, index):
"""Retrieve (source_rorp, dest_rorp) from cache"""
return self.cache_dict[index][:2]
......@@ -337,6 +352,7 @@ class CacheCollatedPostProcess:
while self.cache_indicies: self.shorten_cache()
metadata.CloseMetadata()
if Globals.print_statistics: statistics.print_active_stats()
if Globals.file_statistics: statistics.FileStats.close()
statistics.write_active_statfileobj()
......@@ -511,6 +527,7 @@ class IncrementITRB(PatchITRB):
if self.patch_to_temp(rp, diff_rorp, tf):
inc = self.inc_with_checking(tf, rp, self.get_incrp(index))
if inc is not None:
self.CCPP.set_inc(index, inc)
if inc.isreg():
inc.fsync_with_dir() # Write inc before rp changed
if tf.lstat():
......@@ -531,10 +548,12 @@ class IncrementITRB(PatchITRB):
inc = self.inc_with_checking(diff_rorp, base_rp,
self.get_incrp(index))
if inc and inc.isreg():
inc.fsync_with_dir() # must writte inc before rp changed
inc.fsync_with_dir() # must write inc before rp changed
self.prepare_dir(diff_rorp, base_rp)
elif (self.set_dir_replacement(diff_rorp, base_rp) and
self.inc_with_checking(self.dir_replacement, base_rp,
self.get_incrp(index))):
self.CCPP.flag_success(index)
elif self.set_dir_replacement(diff_rorp, base_rp):
inc = self.inc_with_checking(self.dir_replacement, base_rp,
self.get_incrp(index))
if inc:
self.CCPP.set_inc(index, inc)
self.CCPP.flag_success(index)
......@@ -104,26 +104,20 @@ def RORP2Record(rorpath):
str_list.append(" Permissions %s\n" % rorpath.getperms())
return "".join(str_list)
line_parsing_regexp = re.compile("^ *([A-Za-z0-9]+) (.+)$")
line_parsing_regexp = re.compile("^ *([A-Za-z0-9]+) (.+)$", re.M)
def Record2RORP(record_string):
"""Given record_string, return RORPath
For speed reasons, write the RORPath data dictionary directly
instead of calling rorpath functions. This depends on the
instead of calling rorpath functions. Profiling has shown this to
be a time critical function.
"""
data_dict = {}
index_list = [None] # put in list so we can modify using parse_line
def process_line(line):
"""Process given line, and modify data_dict or index_list"""
if not line: return # skip empty lines
match = line_parsing_regexp.search(line)
if not match: raise ParsingError("Bad line: '%s'" % line)
field, data = match.group(1), match.group(2)
for field, data in line_parsing_regexp.findall(record_string):
if field == "File":
if data == ".": index_list[0] = ()
else: index_list[0] = tuple(unquote_path(data).split("/"))
if data == ".": index = ()
else: index = tuple(unquote_path(data).split("/"))
elif field == "Type":
if data == "None": data_dict['type'] = None
else: data_dict['type'] = data
......@@ -140,9 +134,7 @@ def Record2RORP(record_string):
elif field == "Gid": data_dict['gid'] = int(data)
elif field == "Permissions": data_dict['perms'] = int(data)
else: raise ParsingError("Unknown field in line '%s'" % line)
map(process_line, record_string.split("\n"))
return rpath.RORPath(index_list[0], data_dict)
return rpath.RORPath(index, data_dict)
chars_to_quote = re.compile("\\n|\\\\")
def quote_path(path_string):
......@@ -260,6 +252,7 @@ class rorp_extractor:
metadata_rp = None
metadata_fileobj = None
metadata_record_buffer = [] # Use this because gzip writes are slow
def OpenMetadata(rp = None, compress = 1):
"""Open the Metadata file for writing, return metadata fileobj"""
global metadata_rp, metadata_fileobj
......@@ -274,13 +267,20 @@ def OpenMetadata(rp = None, compress = 1):
def WriteMetadata(rorp):
"""Write metadata of rorp to file"""
global metadata_fileobj
metadata_fileobj.write(RORP2Record(rorp))
global metadata_fileobj, metadata_record_buffer
metadata_record_buffer.append(RORP2Record(rorp))
if len(metadata_record_buffer) >= 100: write_metadata_buffer()
def write_metadata_buffer():
global metadata_record_buffer
metadata_fileobj.write("".join(metadata_record_buffer))
metadata_record_buffer = []
def CloseMetadata():
"""Close the metadata file"""
global metadata_rp, metadata_fileobj
assert metadata_fileobj, "Metadata file not open"
if metadata_record_buffer: write_metadata_buffer()
try: fileno = metadata_fileobj.fileno() # will not work if GzipFile
except AttributeError: fileno = metadata_fileobj.fileobj.fileno()
os.fsync(fileno)
......
......@@ -440,8 +440,8 @@ class RORPath:
def getnumlinks(self):
"""Number of places inode is linked to"""
try: return self.data['nlink']
except KeyError: return 1
if self.data.has_key('nlink'): return self.data['nlink']
else: return 1
def readlink(self):
"""Wrapper around os.readlink()"""
......
......@@ -20,7 +20,7 @@
"""Generate and process aggregated backup information"""
import re, os, time
import Globals, robust, Time, rorpiter, increment, log
import Globals, Time, increment, log, static
class StatsException(Exception): pass
......@@ -347,6 +347,69 @@ def print_active_stats():
"""Print statistics of active statobj to stdout and log"""
global _active_statfileobj
assert _active_statfileobj
_active_statfileobj.finish()
statmsg = _active_statfileobj.get_stats_logstring("Session statistics")
log.Log.log_to_file(statmsg)
Globals.client_conn.sys.stdout.write(statmsg)
class FileStats:
"""Keep track of less detailed stats on file-by-file basis"""
_fileobj, _rp = None, None
_line_sep = None
def init(cls):
"""Open file stats object and prepare to write"""
assert not (cls._fileobj or cls._rp), (cls._fileobj, cls._rp)
rpbase = Globals.rbdir.append("file_statistics")
suffix = Globals.compression and 'data.gz' or 'data'
cls._rp = increment.get_inc(rpbase, suffix, Time.curtime)
assert not cls._rp.lstat()
cls._fileobj = cls._rp.open("wb", compress = Globals.compression)
cls._line_sep = Globals.null_separator and '\0' or '\n'
cls.write_docstring()
cls.line_buffer = []
def write_docstring(cls):
"""Write the first line (a documentation string) into file"""
cls._fileobj.write("# Format of each line in file statistics file:")
cls._fileobj.write(cls._line_sep)
cls._fileobj.write("# Filename Changed SourceSize MirrorSize "
"IncrementSize" + cls._line_sep)
def update(cls, source_rorp, dest_rorp, changed, inc):
"""Update file stats with given information"""
if source_rorp: filename = source_rorp.get_indexpath()
else: filename = dest_rorp.get_indexpath()
size_list = map(cls.get_size, [source_rorp, dest_rorp, inc])
line = " ".join([filename, str(changed)] + size_list)
cls.line_buffer.append(line)
if len(cls.line_buffer) >= 100: cls.write_buffer()
def get_size(cls, rorp):
"""Return the size of rorp as string, or "NA" if not a regular file"""
if not rorp: return "NA"
if rorp.isreg(): return str(rorp.getsize())
else: return "0"
def write_buffer(cls):
"""Write buffer to file because buffer is full
The buffer part is necessary because the GzipFile.write()
method seems fairly slow.
"""
assert cls.line_buffer and cls._fileobj
cls.line_buffer.append('') # have join add _line_sep to end also
cls._fileobj.write(cls._line_sep.join(cls.line_buffer))
cls.line_buffer = []
def close(cls):
"""Close file stats file"""
assert cls._fileobj, cls._fileobj
if cls.line_buffer: cls.write_buffer()
assert not cls._fileobj.close()
cls._fileobj = cls._rp = None
static.MakeClass(FileStats)
......@@ -8,7 +8,7 @@ class MetadataTest(unittest.TestCase):
def make_temp(self):
"""Make temp directory testfiles/output"""
global tempdir
tempdir.delete()
if tempdir.lstat(): tempdir.delete()
tempdir.mkdir()
def testQuote(self):
......
......@@ -180,26 +180,6 @@ class IncStatTest(unittest.TestCase):
rbdir = rpath.RPath(Globals.local_connection,
"testfiles/output/rdiff-backup-data")
#incs = Restore.get_inclist(rbdir.append("subdir").
# append("directory_statistics"))
#assert len(incs) == 2
#s1 = StatsObj().read_stats_from_rp(incs[0]) # initial mirror stats
#assert s1.SourceFiles == 2
#assert 400000 < s1.SourceFileSize < 420000
#self.stats_check_initial(s1)
#subdir_stats = StatsObj().read_stats_from_rp(incs[1]) # increment stats
#assert subdir_stats.SourceFiles == 2
#assert 400000 < subdir_stats.SourceFileSize < 420000
#assert subdir_stats.MirrorFiles == 2
#assert 400000 < subdir_stats.MirrorFileSize < 420000
#assert subdir_stats.NewFiles == subdir_stats.NewFileSize == 0
#assert subdir_stats.DeletedFiles == subdir_stats.DeletedFileSize == 0
#assert subdir_stats.ChangedFiles == 2
#assert 400000 < subdir_stats.ChangedSourceSize < 420000
#assert 400000 < subdir_stats.ChangedMirrorSize < 420000
#assert 10 < subdir_stats.IncrementFileSize < 20000
incs = restore.get_inclist(rbdir.append("session_statistics"))
assert len(incs) == 2
s2 = statistics.StatsObj().read_stats_from_rp(incs[0])
......@@ -214,7 +194,7 @@ class IncStatTest(unittest.TestCase):
assert 700000 <= root_stats.MirrorFileSize < 750000
assert root_stats.NewFiles == 1
assert root_stats.NewFileSize == 0
assert root_stats.DeletedFiles == 1
assert root_stats.DeletedFiles == 1, root_stats.DeletedFiles
assert root_stats.DeletedFileSize == 200000
assert 3 <= root_stats.ChangedFiles <= 4, root_stats.ChangedFiles
assert 450000 <= root_stats.ChangedSourceSize < 470000
......
import profile, pstats
from metadatatest import *
profile.run("unittest.main()", "profile-output")
p = pstats.Stats("profile-output")
p.sort_stats('time')
p.print_stats(40)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment