Commit 4f0c89c5 authored by ben's avatar ben

Added new Select code


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@43 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent b737962f
......@@ -187,6 +187,7 @@ class DestructiveStepping:
destination file.
"""
return None # this code is for the test suites only, use Select instead
if Globals.exclude_device_files and dsrp.isdev(): return 1
if source: exclude_regexps = Globals.exclude_regexps
......
#!/usr/bin/env python
#
# rdiff-backup -- Mirror files while keeping incremental changes
# Version 0.7.2 released April 30, 2002
# Version 0.7.2 released April 11, 2002
# Copyright (C) 2001, 2002 Ben Escoto <bescoto@stanford.edu>
#
# This program is licensed under the GNU General Public License (GPL).
......
......@@ -37,7 +37,7 @@ class HighLevel:
SourceS.set_session_info(session_info)
DestS.set_session_info(session_info)
src_init_dsiter = SourceS.split_initial_dsiter(src_rpath)
src_init_dsiter = SourceS.split_initial_dsiter()
dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter)
diffiter = SourceS.get_diffs_and_finalize(dest_sigiter)
DestS.patch_and_finalize(dest_rpath, diffiter, checkpoint)
......@@ -53,7 +53,7 @@ class HighLevel:
SourceS.set_session_info(session_info)
DestS.set_session_info(session_info)
if not session_info: dest_rpath.conn.SaveState.touch_last_file()
src_init_dsiter = SourceS.split_initial_dsiter(src_rpath)
src_init_dsiter = SourceS.split_initial_dsiter()
dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter)
diffiter = SourceS.get_diffs_and_finalize(dest_sigiter)
DestS.patch_increment_and_finalize(dest_rpath, diffiter, inc_rpath)
......@@ -85,16 +85,15 @@ class HLSourceStruct:
def set_session_info(cls, session_info):
cls._session_info = session_info
def iterate_from(cls, rpath):
def iterate_from(cls):
"""Supply more aruments to DestructiveStepping.Iterate_from"""
if cls._session_info:
return DestructiveStepping.Iterate_from(rpath, 1,
cls._session_info.last_index)
else: return DestructiveStepping.Iterate_from(rpath, 1)
if cls._session_info is None: Globals.select_source.set_iter()
else: Globals.select_source.set_iter(cls._session_info.last_index)
return Globals.select_source
def split_initial_dsiter(cls, rpath):
def split_initial_dsiter(cls):
"""Set iterators of all dsrps from rpath, returning one"""
dsiter = cls.iterate_from(rpath)
dsiter = cls.iterate_from()
initial_dsiter1, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2)
return initial_dsiter1
......@@ -131,17 +130,15 @@ class HLDestinationStruct:
def set_session_info(cls, session_info):
cls._session_info = session_info
def iterate_from(cls, rpath):
def iterate_from(cls):
"""Supply more arguments to DestructiveStepping.Iterate_from"""
if cls._session_info:
return DestructiveStepping.Iterate_from(rpath, None,
cls._session_info.last_index)
else: return DestructiveStepping.Iterate_from(rpath, None)
if cls._session_info is None: Globals.select_mirror.set_iter()
else: Globals.select_mirror.set_iter(cls._session_info.last_index)
return Globals.select_mirror
def split_initial_dsiter(cls, rpath):
def split_initial_dsiter(cls):
"""Set initial_dsiters (iteration of all dsrps from rpath)"""
dsiter = cls.iterate_from(rpath)
result, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2)
result, cls.initial_dsiter2 = Iter.multiplex(cls.iterate_from(), 2)
return result
def get_dissimilar(cls, baserp, src_init_iter, dest_init_iter):
......@@ -197,7 +194,7 @@ class HLDestinationStruct:
def get_sigs(cls, baserp, src_init_iter):
"""Return signatures of all dissimilar files"""
dest_iters1 = cls.split_initial_dsiter(baserp)
dest_iters1 = cls.split_initial_dsiter()
dissimilars = cls.get_dissimilar(baserp, src_init_iter, dest_iters1)
return RORPIter.Signatures(dissimilars)
......
......@@ -404,6 +404,9 @@ class RPath(RORPath):
"bar") for "foo/bar" (no base), and ("local", "bin") for
"/usr/local/bin" if the base is "/usr".
For the root directory "/", the index is empty and the base is
"/".
"""
self.conn = connection
self.index = index
......@@ -487,9 +490,6 @@ class RPath(RORPath):
def _getdevnums(self):
"""Return tuple for special file (major, minor)"""
if Globals.exclude_device_files:
# No point in finding numbers because it will be excluded anyway
return ()
s = self.conn.reval("lambda path: os.lstat(path).st_rdev", self.path)
return (s >> 8, s & 0xff)
......@@ -596,10 +596,9 @@ class RPath(RORPath):
def helper(dsrp, base_init_output, branch_reduction):
if dsrp.isdir(): dsrp.rmdir()
else: dsrp.delete()
dsiter = DestructiveStepping.Iterate_from(self, None)
itm = IterTreeReducer(lambda x: None, lambda x,y: None, None,
helper)
for dsrp in dsiter: itm(dsrp)
for dsrp in Select(self, None).set_iter(): itm(dsrp)
itm.getresult()
else: self.conn.os.unlink(self.path)
self.setdata()
......
......@@ -10,10 +10,18 @@ import re
# documentation on what this code does can be found on the man page.
#
class FilePrefixError(Exception):
class SelectError(Exception):
"""Some error dealing with the Select class"""
pass
class FilePrefixError(SelectError):
"""Signals that a specified file doesn't start with correct prefix"""
pass
class GlobbingError(SelectError):
"""Something has gone wrong when parsing a glob string"""
pass
class Select:
"""Iterate appropriate DSRPaths in given directory
......@@ -50,60 +58,97 @@ class Select:
"""
# This re should not match normal filenames, but usually just globs
glob_re = re.compile(".*[\*\?\[]")
glob_re = re.compile("(.*[*?[]|ignorecase\\:)", re.I | re.S)
def __init__(self, rpath, source):
"""DSRPIterator initializer.
def __init__(self, dsrpath):
"""DSRPIterator initializer"""
rpath is the root dir. Source is true if rpath is the root of
the source directory, and false for the mirror directory
"""
assert isinstance(rpath, RPath)
self.selection_functions = []
self.dsrpath = dsrpath
self.prefix = dsrpath.path
self.source = source
if isinstance(rpath, DSRPath): self.dsrpath = rpath
else: self.dsrpath = DSRPath(rpath.conn, rpath.base,
rpath.index, rpath.data)
self.prefix = self.dsrpath.path
def set_iter(self, starting_index = None):
"""Initialize more variables. dsrpath should be the root dir"""
def set_iter(self, starting_index = None, sel_func = None):
"""Initialize more variables, get ready to iterate
Will iterate indicies greater than starting_index. Selection
function sel_func is called on each dsrp and is usually
self.Select. Returns self just for convenience.
"""
if not sel_func: sel_func = self.Select
self.dsrpath.setdata() # this may have changed since Select init
if starting_index is not None:
self.starting_index = starting_index
self.iter = self.iterate_starting_from(self.dsrpath,
starting_index, self.iterate_starting_from)
else: self.iter = self.Iterate(self.dsrpath, self.Iterate)
self.iterate_starting_from, sel_func)
else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
self.next = self.iter.next
self.__iter__ = lambda: self
return self
def Iterate(self, dsrpath, rec_func):
def Iterate(self, dsrpath, rec_func, sel_func):
"""Return iterator yielding dsrps in dsrpath
rec_func is usually the same as this function and is what
Iterate uses to find files in subdirectories. It is used in
iterate_starting_from.
sel_func is the selection function to use on the dsrps. It is
usually self.Select.
"""
s = self.Select(dsrpath)
s = sel_func(dsrpath)
if not s or DestructiveStepping.initialize(dsrpath, self.source):
return
if s == 1: # File is included
yield dsrpath
if dsrpath.isdir():
for dsrp in self.iterate_in_dir(dsrpath, rec_func): yield dsrp
for dsrp in self.iterate_in_dir(dsrpath, rec_func, sel_func):
yield dsrp
elif s == 2 and dsrpath.isdir(): # Directory is merely scanned
iid = self.iterate_in_dir(dsrpath, rec_func)
iid = self.iterate_in_dir(dsrpath, rec_func, sel_func)
try: first = iid.next()
except StopIteration: return # no files inside; skip dsrp
yield dsrpath
yield first
for dsrp in iid: yield dsrp
def iterate_in_dir(self, dsrpath, rec_func):
def iterate_in_dir(self, dsrpath, rec_func, sel_func):
"""Iterate the dsrps in directory dsrpath."""
dir_listing = dsrpath.listdir()
dir_listing.sort()
for filename in dir_listing:
for dsrp in rec_func(dsrpath.append(filename)): yield dsrp
for dsrp in rec_func(dsrpath.append(filename), rec_func, sel_func):
yield dsrp
def iterate_starting_from(self, dsrpath):
def iterate_starting_from(self, dsrpath, rec_func, sel_func):
"""Like Iterate, but only yield indicies > self.starting_index"""
if DestructiveStepping.initialize(dsrpath, self.source): return
if dsrpath.index > self.starting_index: # past starting_index
for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp
for dsrp in self.Iterate(dsrpath, self.Iterate, sel_func):
yield dsrp
elif dsrpath.index == self.starting_index[:len(dsrpath.index)]:
# May encounter starting index on this branch
for dsrp in self.Iterate(dsrpath, self.iterate_starting_from):
yield dsrp
for dsrp in self.iterate_in_dir(dsrpath,
self.iterate_starting_from,
sel_func): yield dsrp
def iterate_with_finalizer(self):
"""Like Iterate, but missing some options, and add finalizer"""
finalize = DestructiveStepping.Finalizer()
for dsrp in self:
yield dsrp
finalize(dsrp)
finalize.getresult()
def Select(self, dsrp):
"""Run through the selection functions and return dominant value"""
for sf in self.selection_functions:
......@@ -123,29 +168,68 @@ class Select:
information is sent over the link.
"""
for opt, arg in argtuples:
if opt == "--exclude":
self.add_selection_func(self.glob_get_sf(arg, 0))
elif opt == "--exclude-device-files":
self.add_selection_func(self.devfiles_get_sf())
elif opt == "--exclude-filelist":
self.add_selection_func(self.filelist_get_sf(arg[1],
0, arg[0]))
elif opt == "--exclude-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 0))
elif opt == "--include":
self.add_selection_func(self.glob_get_sf(arg, 1))
elif opt == "--include-filelist":
self.add_selection_func(self.filelist_get_sf(arg[1],
1, arg[0]))
elif opt == "--include-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 1))
else: assert 0, "Bad option %s" % opt
# Exclude rdiff-backup-data directory
try:
for opt, arg in argtuples:
if opt == "--exclude":
self.add_selection_func(self.glob_get_sf(arg, 0))
elif opt == "--exclude-device-files":
self.add_selection_func(self.devfiles_get_sf())
elif opt == "--exclude-filelist":
self.add_selection_func(self.filelist_get_sf(arg[1],
0, arg[0]))
elif opt == "--exclude-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 0))
elif opt == "--include":
self.add_selection_func(self.glob_get_sf(arg, 1))
elif opt == "--include-filelist":
self.add_selection_func(self.filelist_get_sf(arg[1],
1, arg[0]))
elif opt == "--include-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 1))
else: assert 0, "Bad option %s" % opt
except SelectError, e: self.parse_catch_error(e)
self.parse_last_excludes()
self.parse_rbdir_exclude()
self.parse_proc_exclude()
def parse_catch_error(self, exc):
"""Deal with selection error exc"""
if isinstance(exc, FilePrefixError):
Log.FatalError(
"""Fatal Error: The file specification
%s
cannot match any files in the base directory
%s
Useful file specifications begin with the base directory or some
pattern (such as '**') which matches the base directory.""" %
(exc, self.prefix))
elif isinstance(e, GlobbingError):
Log.FatalError("Fatal Error while processing expression\n"
"%s" % exc)
else: raise
def parse_rbdir_exclude(self):
"""Add exclusion of rdiff-backup-data dir to front of list"""
self.add_selection_func(
self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1)
def parse_proc_exclude(self):
"""Exclude the /proc directory if starting from /"""
if self.prefix == "/":
self.add_selection_func(self.glob_get_tuple_sf(("proc",), 0), 1)
def parse_last_excludes(self):
"""Exit with error if last selection function isn't an exclude"""
if self.select_functions and not self.selection_functions[-1].exclude:
Log.FatalError(
"""Last selection expression:
%s
only specifies that files be included. Because the default is to
include all files, the expression is redundant. Exiting because this
probably isn't what you meant.""" %
(self.selection_functions[-1].name, self.prefix))
def add_selection_func(self, sel_func, add_to_start = None):
"""Add another selection function at the end or beginning"""
if add_to_start: self.selection_functions.insert(0, sel_func)
......@@ -259,8 +343,7 @@ class Select:
raise
def sel_func(dsrp):
match = regexp.match(dsrp.path)
if match and match.end(0) == len(dsrp.path): return include
if regexp.search(dsrp.path): return include
else: return None
sel_func.exclude = not include
......@@ -284,8 +367,8 @@ class Select:
assert include == 0 or include == 1
if glob_str == "**": sel_func = lambda dsrp: include
elif not self.glob_re.match(glob_str): # normal file
return self.glob_get_filename_sf(glob_str, include)
else: pass ####XXXXXXXXXXXXX
sel_func = self.glob_get_filename_sf(glob_str, include)
else: sel_func = self.glob_get_normal_sf(glob_str, include)
sel_func.exclude = not include
sel_func.name = "Command-line glob: %s" % glob_str
......@@ -296,20 +379,18 @@ class Select:
Some of the parsing is better explained in
filelist_parse_line. The reason this is split from normal
globbing is so we can check the prefix and give proper
warning.
globbing is things are a lot less complicated if no special
globbing characters are used.
"""
if not filename.startswith(self.prefix):
Log("Warning: file specification %s does not start with\n"
"prefix %s, ignoring" % (filename, self.prefix), 2)
return lambda x: None # dummy selection function
raise FilePrefixError(filename)
index = tuple(filter(lambda x: x,
filename[len(self.prefix):].split("/")))
return self.glob_get_tuple_sf(index, include)
def glob_get_tuple_sf(self, tuple, include):
"""Add selection function based on tuple"""
"""Return selection function based on tuple"""
def include_sel_func(dsrp):
if (dsrp.index == tuple[:len(dsrp.index)] or
dsrp.index[:len(tuple)] == tuple):
......@@ -327,3 +408,94 @@ class Select:
sel_func.name = "Tuple select %s" % (tuple,)
return sel_func
def glob_get_normal_sf(self, glob_str, include):
"""Return selection function based on glob_str
The basic idea is to turn glob_str into a regular expression,
and just use the normal regular expression. There is a
complication because the selection function should return '2'
(scan) for directories which may contain a file which matches
the glob_str. So we break up the glob string into parts, and
any file which matches an initial sequence of glob parts gets
scanned.
Thanks to Donovan Baarda who provided some code which did some
things similar to this.
"""
if glob_str.lower().startswith("ignorecase:"):
re_comp = lambda r: re.compile(r, re.I | re.S)
glob_str = glob_str[len("ignorecase:"):]
else: re_comp = lambda r: re.compile(r, re.S)
# matches what glob matches and any files in directory
glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str))
if glob_str.find("**") != -1:
glob_str = glob_str[:glob_str.find("**")+2] # truncate after **
scan_comp_re = re_comp("^(%s)$" %
"|".join(self.glob_get_prefix_res(glob_str)))
def include_sel_func(dsrp):
if glob_comp_re.match(dsrp.path): return 1
elif scan_comp_re.match(dsrp.path): return 2
else: return None
def exclude_sel_func(dsrp):
if glob_comp_re.match(dsrp.path): return 0
else: return None
# Check to make sure prefix is ok
if not include_sel_func(self.dsrpath): raise FilePrefixError(glob_str)
if include: return include_sel_func
else: return exclude_sel_func
def glob_get_prefix_res(self, glob_str):
"""Return list of regexps equivalent to prefixes of glob_str"""
glob_parts = glob_str.split("/")
if "" in glob_parts[1:-1]: # "" OK if comes first or last, as in /foo/
raise GlobbingError("Consecutive '/'s found in globbing string "
+ glob_str)
prefixes = map(lambda i: "/".join(glob_parts[:i+1]),
range(len(glob_parts)))
# we must make exception for root "/", only dir to end in slash
if prefixes[0] == "": prefixes[0] = "/"
return map(self.glob_to_re, prefixes)
def glob_to_re(self, pat):
"""Returned regular expression equivalent to shell glob pat
Currently only the ?, *, [], and ** expressions are supported.
Ranges like [a-z] are also currently unsupported. There is no
way to quote these special characters.
This function taken with minor modifications from efnmatch.py
by Donovan Baarda.
"""
i, n, res = 0, len(pat), ''
while i < n:
c, s = pat[i], pat[i:i+2]
i = i+1
if s == '**':
res = res + '.*'
i = i + 1
elif c == '*': res = res + '[^/]*'
elif c == '?': res = res + '[^/]'
elif c == '[':
j = i
if j < n and pat[j] in '!^': j = j+1
if j < n and pat[j] == ']': j = j+1
while j < n and pat[j] != ']': j = j+1
if j >= n: res = res + '\\[' # interpret the [ literally
else: # Deal with inside of [..]
stuff = pat[i:j].replace('\\','\\\\')
i = j+1
if stuff[0] in '!^': stuff = '^' + stuff[1:]
res = res + '[' + stuff + ']'
else: res = res + re.escape(c)
return res
......@@ -187,6 +187,7 @@ class DestructiveStepping:
destination file.
"""
return None # this code is for the test suites only, use Select instead
if Globals.exclude_device_files and dsrp.isdev(): return 1
if source: exclude_regexps = Globals.exclude_regexps
......
......@@ -123,8 +123,8 @@ class Globals:
# case-insensitive regular expression won't be compressed (applies
# to .snapshots and .diffs). The second below will be the
# compiled version of the first.
no_compression_regexp_string = ".*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|" \
"jpg|gif|png|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$"
no_compression_regexp_string = "(?i).*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|" \
"jpg|gif|png|jp2|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$"
no_compression_regexp = None
# On the reader and writer connections, the following will be
......@@ -183,9 +183,9 @@ class Globals:
def set_select(cls, source, dsrpath, tuplelist):
"""Initialize select object using tuplelist"""
if source:
cls.select_source = Select(dsrpath)
cls.select_source = Select(dsrpath, 1)
cls.select_source.ParseArgs(tuplelist)
else:
cls.select_mirror = Select(dsrpath)
cls.select_mirror = Select(dsrpath, None)
cls.select_mirror.ParseArgs(tuplelist)
set_select = classmethod(set_select)
#!/usr/bin/env python
#
# rdiff-backup -- Mirror files while keeping incremental changes
# Version 0.7.2 released April 30, 2002
# Version 0.7.2 released April 11, 2002
# Copyright (C) 2001, 2002 Ben Escoto <bescoto@stanford.edu>
#
# This program is licensed under the GNU General Public License (GPL).
......
......@@ -37,7 +37,7 @@ class HighLevel:
SourceS.set_session_info(session_info)
DestS.set_session_info(session_info)
src_init_dsiter = SourceS.split_initial_dsiter(src_rpath)
src_init_dsiter = SourceS.split_initial_dsiter()
dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter)
diffiter = SourceS.get_diffs_and_finalize(dest_sigiter)
DestS.patch_and_finalize(dest_rpath, diffiter, checkpoint)
......@@ -53,7 +53,7 @@ class HighLevel:
SourceS.set_session_info(session_info)
DestS.set_session_info(session_info)
if not session_info: dest_rpath.conn.SaveState.touch_last_file()
src_init_dsiter = SourceS.split_initial_dsiter(src_rpath)
src_init_dsiter = SourceS.split_initial_dsiter()
dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter)
diffiter = SourceS.get_diffs_and_finalize(dest_sigiter)
DestS.patch_increment_and_finalize(dest_rpath, diffiter, inc_rpath)
......@@ -85,16 +85,15 @@ class HLSourceStruct:
def set_session_info(cls, session_info):
cls._session_info = session_info
def iterate_from(cls, rpath):
def iterate_from(cls):
"""Supply more aruments to DestructiveStepping.Iterate_from"""
if cls._session_info:
return DestructiveStepping.Iterate_from(rpath, 1,
cls._session_info.last_index)
else: return DestructiveStepping.Iterate_from(rpath, 1)
if cls._session_info is None: Globals.select_source.set_iter()
else: Globals.select_source.set_iter(cls._session_info.last_index)
return Globals.select_source
def split_initial_dsiter(cls, rpath):
def split_initial_dsiter(cls):
"""Set iterators of all dsrps from rpath, returning one"""
dsiter = cls.iterate_from(rpath)
dsiter = cls.iterate_from()
initial_dsiter1, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2)
return initial_dsiter1
......@@ -131,17 +130,15 @@ class HLDestinationStruct:
def set_session_info(cls, session_info):
cls._session_info = session_info
def iterate_from(cls, rpath):
def iterate_from(cls):
"""Supply more arguments to DestructiveStepping.Iterate_from"""
if cls._session_info:
return DestructiveStepping.Iterate_from(rpath, None,
cls._session_info.last_index)
else: return DestructiveStepping.Iterate_from(rpath, None)
if cls._session_info is None: Globals.select_mirror.set_iter()
else: Globals.select_mirror.set_iter(cls._session_info.last_index)
return Globals.select_mirror
def split_initial_dsiter(cls, rpath):
def split_initial_dsiter(cls):
"""Set initial_dsiters (iteration of all dsrps from rpath)"""
dsiter = cls.iterate_from(rpath)
result, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2)
result, cls.initial_dsiter2 = Iter.multiplex(cls.iterate_from(), 2)
return result
def get_dissimilar(cls, baserp, src_init_iter, dest_init_iter):
......@@ -197,7 +194,7 @@ class HLDestinationStruct:
def get_sigs(cls, baserp, src_init_iter):
"""Return signatures of all dissimilar files"""
dest_iters1 = cls.split_initial_dsiter(baserp)
dest_iters1 = cls.split_initial_dsiter()
dissimilars = cls.get_dissimilar(baserp, src_init_iter, dest_iters1)
return RORPIter.Signatures(dissimilars)
......
......@@ -125,7 +125,7 @@ class Main:
sys.exit(1)
def misc_setup(self, rps):
"""Set default change ownership flag, umask, regular expressions"""
"""Set default change ownership flag, umask, Select objects"""
if ((len(rps) == 2 and rps[1].conn.os.getuid() == 0) or
(len(rps) < 2 and os.getuid() == 0)):
# Allow change_ownership if destination connection is root
......@@ -139,7 +139,7 @@ class Main:
rps[1].conn.Globals.set_select(None, rps[1],
self.select_mirror_opts)
Globals.postset_regexp('no_compression_regexp',
Globals.no_compression_regexp_string, re.I)
Globals.no_compression_regexp_string)
def take_action(self, rps):
"""Do whatever self.action says"""
......@@ -248,8 +248,8 @@ rdiff-backup with the --force option.""" % rpout.path)
(rpin.path == "." and rpout.path[0] != '/' and
rpout.path[:2] != '..')):
# Just a few heuristics, we don't have to get every case
if not DestructiveStepping.isexcluded(rpout, 1):
Log(
if Globals.backup_reader.Globals.select_source \
.Select(rpout): Log(
"""Warning: The destination directory '%s' may be contained in the
source directory '%s'. This could cause an infinite regress. You
may need to use the --exclude option.""" % (rpout.path, rpin.path), 2)
......
......@@ -404,6 +404,9 @@ class RPath(RORPath):
"bar") for "foo/bar" (no base), and ("local", "bin") for
"/usr/local/bin" if the base is "/usr".
For the root directory "/", the index is empty and the base is
"/".
"""
self.conn = connection
self.index = index
......@@ -487,9 +490,6 @@ class RPath(RORPath):
def _getdevnums(self):
"""Return tuple for special file (major, minor)"""
if Globals.exclude_device_files:
# No point in finding numbers because it will be excluded anyway
return ()
s = self.conn.reval("lambda path: os.lstat(path).st_rdev", self.path)
return (s >> 8, s & 0xff)
......@@ -596,10 +596,9 @@ class RPath(RORPath):
def helper(dsrp, base_init_output, branch_reduction):
if dsrp.isdir(): dsrp.rmdir()
else: dsrp.delete()
dsiter = DestructiveStepping.Iterate_from(self, None)
itm = IterTreeReducer(lambda x: None, lambda x,y: None, None,
helper)
for dsrp in dsiter: itm(dsrp)
for dsrp in Select(self, None).set_iter(): itm(dsrp)
itm.getresult()
else: self.conn.os.unlink(self.path)
self.setdata()
......
......@@ -10,10 +10,18 @@ import re
# documentation on what this code does can be found on the man page.
#
class FilePrefixError(Exception):
class SelectError(Exception):
"""Some error dealing with the Select class"""
pass
class FilePrefixError(SelectError):
"""Signals that a specified file doesn't start with correct prefix"""
pass
class GlobbingError(SelectError):
"""Something has gone wrong when parsing a glob string"""
pass
class Select:
"""Iterate appropriate DSRPaths in given directory
......@@ -50,60 +58,97 @@ class Select:
"""
# This re should not match normal filenames, but usually just globs
glob_re = re.compile(".*[\*\?\[]")
glob_re = re.compile("(.*[*?[]|ignorecase\\:)", re.I | re.S)
def __init__(self, rpath, source):
"""DSRPIterator initializer.
def __init__(self, dsrpath):
"""DSRPIterator initializer"""
rpath is the root dir. Source is true if rpath is the root of
the source directory, and false for the mirror directory
"""
assert isinstance(rpath, RPath)
self.selection_functions = []
self.dsrpath = dsrpath
self.prefix = dsrpath.path
self.source = source
if isinstance(rpath, DSRPath): self.dsrpath = rpath
else: self.dsrpath = DSRPath(rpath.conn, rpath.base,
rpath.index, rpath.data)
self.prefix = self.dsrpath.path
def set_iter(self, starting_index = None):
"""Initialize more variables. dsrpath should be the root dir"""
def set_iter(self, starting_index = None, sel_func = None):
"""Initialize more variables, get ready to iterate
Will iterate indicies greater than starting_index. Selection
function sel_func is called on each dsrp and is usually
self.Select. Returns self just for convenience.
"""
if not sel_func: sel_func = self.Select
self.dsrpath.setdata() # this may have changed since Select init
if starting_index is not None:
self.starting_index = starting_index
self.iter = self.iterate_starting_from(self.dsrpath,
starting_index, self.iterate_starting_from)
else: self.iter = self.Iterate(self.dsrpath, self.Iterate)
self.iterate_starting_from, sel_func)
else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
self.next = self.iter.next
self.__iter__ = lambda: self
return self
def Iterate(self, dsrpath, rec_func):
def Iterate(self, dsrpath, rec_func, sel_func):
"""Return iterator yielding dsrps in dsrpath
rec_func is usually the same as this function and is what
Iterate uses to find files in subdirectories. It is used in
iterate_starting_from.
sel_func is the selection function to use on the dsrps. It is
usually self.Select.
"""
s = self.Select(dsrpath)
s = sel_func(dsrpath)
if not s or DestructiveStepping.initialize(dsrpath, self.source):
return
if s == 1: # File is included
yield dsrpath
if dsrpath.isdir():
for dsrp in self.iterate_in_dir(dsrpath, rec_func): yield dsrp
for dsrp in self.iterate_in_dir(dsrpath, rec_func, sel_func):
yield dsrp
elif s == 2 and dsrpath.isdir(): # Directory is merely scanned
iid = self.iterate_in_dir(dsrpath, rec_func)
iid = self.iterate_in_dir(dsrpath, rec_func, sel_func)
try: first = iid.next()
except StopIteration: return # no files inside; skip dsrp
yield dsrpath
yield first
for dsrp in iid: yield dsrp
def iterate_in_dir(self, dsrpath, rec_func):
def iterate_in_dir(self, dsrpath, rec_func, sel_func):
"""Iterate the dsrps in directory dsrpath."""
dir_listing = dsrpath.listdir()
dir_listing.sort()
for filename in dir_listing:
for dsrp in rec_func(dsrpath.append(filename)): yield dsrp
for dsrp in rec_func(dsrpath.append(filename), rec_func, sel_func):
yield dsrp
def iterate_starting_from(self, dsrpath):
def iterate_starting_from(self, dsrpath, rec_func, sel_func):
"""Like Iterate, but only yield indicies > self.starting_index"""
if DestructiveStepping.initialize(dsrpath, self.source): return
if dsrpath.index > self.starting_index: # past starting_index
for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp
for dsrp in self.Iterate(dsrpath, self.Iterate, sel_func):
yield dsrp
elif dsrpath.index == self.starting_index[:len(dsrpath.index)]:
# May encounter starting index on this branch
for dsrp in self.Iterate(dsrpath, self.iterate_starting_from):
yield dsrp
for dsrp in self.iterate_in_dir(dsrpath,
self.iterate_starting_from,
sel_func): yield dsrp
def iterate_with_finalizer(self):
"""Like Iterate, but missing some options, and add finalizer"""
finalize = DestructiveStepping.Finalizer()
for dsrp in self:
yield dsrp
finalize(dsrp)
finalize.getresult()
def Select(self, dsrp):
"""Run through the selection functions and return dominant value"""
for sf in self.selection_functions:
......@@ -123,29 +168,68 @@ class Select:
information is sent over the link.
"""
for opt, arg in argtuples:
if opt == "--exclude":
self.add_selection_func(self.glob_get_sf(arg, 0))
elif opt == "--exclude-device-files":
self.add_selection_func(self.devfiles_get_sf())
elif opt == "--exclude-filelist":
self.add_selection_func(self.filelist_get_sf(arg[1],
0, arg[0]))
elif opt == "--exclude-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 0))
elif opt == "--include":
self.add_selection_func(self.glob_get_sf(arg, 1))
elif opt == "--include-filelist":
self.add_selection_func(self.filelist_get_sf(arg[1],
1, arg[0]))
elif opt == "--include-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 1))
else: assert 0, "Bad option %s" % opt
# Exclude rdiff-backup-data directory
try:
for opt, arg in argtuples:
if opt == "--exclude":
self.add_selection_func(self.glob_get_sf(arg, 0))
elif opt == "--exclude-device-files":
self.add_selection_func(self.devfiles_get_sf())
elif opt == "--exclude-filelist":
self.add_selection_func(self.filelist_get_sf(arg[1],
0, arg[0]))
elif opt == "--exclude-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 0))
elif opt == "--include":
self.add_selection_func(self.glob_get_sf(arg, 1))
elif opt == "--include-filelist":
self.add_selection_func(self.filelist_get_sf(arg[1],
1, arg[0]))
elif opt == "--include-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 1))
else: assert 0, "Bad option %s" % opt
except SelectError, e: self.parse_catch_error(e)
self.parse_last_excludes()
self.parse_rbdir_exclude()
self.parse_proc_exclude()
def parse_catch_error(self, exc):
"""Deal with selection error exc"""
if isinstance(exc, FilePrefixError):
Log.FatalError(
"""Fatal Error: The file specification
%s
cannot match any files in the base directory
%s
Useful file specifications begin with the base directory or some
pattern (such as '**') which matches the base directory.""" %
(exc, self.prefix))
elif isinstance(e, GlobbingError):
Log.FatalError("Fatal Error while processing expression\n"
"%s" % exc)
else: raise
def parse_rbdir_exclude(self):
"""Add exclusion of rdiff-backup-data dir to front of list"""
self.add_selection_func(
self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1)
def parse_proc_exclude(self):
"""Exclude the /proc directory if starting from /"""
if self.prefix == "/":
self.add_selection_func(self.glob_get_tuple_sf(("proc",), 0), 1)
def parse_last_excludes(self):
"""Exit with error if last selection function isn't an exclude"""
if self.select_functions and not self.selection_functions[-1].exclude:
Log.FatalError(
"""Last selection expression:
%s
only specifies that files be included. Because the default is to
include all files, the expression is redundant. Exiting because this
probably isn't what you meant.""" %
(self.selection_functions[-1].name, self.prefix))
def add_selection_func(self, sel_func, add_to_start = None):
"""Add another selection function at the end or beginning"""
if add_to_start: self.selection_functions.insert(0, sel_func)
......@@ -259,8 +343,7 @@ class Select:
raise
def sel_func(dsrp):
match = regexp.match(dsrp.path)
if match and match.end(0) == len(dsrp.path): return include
if regexp.search(dsrp.path): return include
else: return None
sel_func.exclude = not include
......@@ -284,8 +367,8 @@ class Select:
assert include == 0 or include == 1
if glob_str == "**": sel_func = lambda dsrp: include
elif not self.glob_re.match(glob_str): # normal file
return self.glob_get_filename_sf(glob_str, include)
else: pass ####XXXXXXXXXXXXX
sel_func = self.glob_get_filename_sf(glob_str, include)
else: sel_func = self.glob_get_normal_sf(glob_str, include)
sel_func.exclude = not include
sel_func.name = "Command-line glob: %s" % glob_str
......@@ -296,20 +379,18 @@ class Select:
Some of the parsing is better explained in
filelist_parse_line. The reason this is split from normal
globbing is so we can check the prefix and give proper
warning.
globbing is things are a lot less complicated if no special
globbing characters are used.
"""
if not filename.startswith(self.prefix):
Log("Warning: file specification %s does not start with\n"
"prefix %s, ignoring" % (filename, self.prefix), 2)
return lambda x: None # dummy selection function
raise FilePrefixError(filename)
index = tuple(filter(lambda x: x,
filename[len(self.prefix):].split("/")))
return self.glob_get_tuple_sf(index, include)
def glob_get_tuple_sf(self, tuple, include):
"""Add selection function based on tuple"""
"""Return selection function based on tuple"""
def include_sel_func(dsrp):
if (dsrp.index == tuple[:len(dsrp.index)] or
dsrp.index[:len(tuple)] == tuple):
......@@ -327,3 +408,94 @@ class Select:
sel_func.name = "Tuple select %s" % (tuple,)
return sel_func
def glob_get_normal_sf(self, glob_str, include):
"""Return selection function based on glob_str
The basic idea is to turn glob_str into a regular expression,
and just use the normal regular expression. There is a
complication because the selection function should return '2'
(scan) for directories which may contain a file which matches
the glob_str. So we break up the glob string into parts, and
any file which matches an initial sequence of glob parts gets
scanned.
Thanks to Donovan Baarda who provided some code which did some
things similar to this.
"""
if glob_str.lower().startswith("ignorecase:"):
re_comp = lambda r: re.compile(r, re.I | re.S)
glob_str = glob_str[len("ignorecase:"):]
else: re_comp = lambda r: re.compile(r, re.S)
# matches what glob matches and any files in directory
glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str))
if glob_str.find("**") != -1:
glob_str = glob_str[:glob_str.find("**")+2] # truncate after **
scan_comp_re = re_comp("^(%s)$" %
"|".join(self.glob_get_prefix_res(glob_str)))
def include_sel_func(dsrp):
if glob_comp_re.match(dsrp.path): return 1
elif scan_comp_re.match(dsrp.path): return 2
else: return None
def exclude_sel_func(dsrp):
if glob_comp_re.match(dsrp.path): return 0
else: return None
# Check to make sure prefix is ok
if not include_sel_func(self.dsrpath): raise FilePrefixError(glob_str)
if include: return include_sel_func
else: return exclude_sel_func
def glob_get_prefix_res(self, glob_str):
"""Return list of regexps equivalent to prefixes of glob_str"""
glob_parts = glob_str.split("/")
if "" in glob_parts[1:-1]: # "" OK if comes first or last, as in /foo/
raise GlobbingError("Consecutive '/'s found in globbing string "
+ glob_str)
prefixes = map(lambda i: "/".join(glob_parts[:i+1]),
range(len(glob_parts)))
# we must make exception for root "/", only dir to end in slash
if prefixes[0] == "": prefixes[0] = "/"
return map(self.glob_to_re, prefixes)
def glob_to_re(self, pat):
"""Returned regular expression equivalent to shell glob pat
Currently only the ?, *, [], and ** expressions are supported.
Ranges like [a-z] are also currently unsupported. There is no
way to quote these special characters.
This function taken with minor modifications from efnmatch.py
by Donovan Baarda.
"""
i, n, res = 0, len(pat), ''
while i < n:
c, s = pat[i], pat[i:i+2]
i = i+1
if s == '**':
res = res + '.*'
i = i + 1
elif c == '*': res = res + '[^/]*'
elif c == '?': res = res + '[^/]'
elif c == '[':
j = i
if j < n and pat[j] in '!^': j = j+1
if j < n and pat[j] == ']': j = j+1
while j < n and pat[j] != ']': j = j+1
if j >= n: res = res + '\\[' # interpret the [ literally
else: # Deal with inside of [..]
stuff = pat[i:j].replace('\\','\\\\')
i = j+1
if stuff[0] in '!^': stuff = '^' + stuff[1:]
res = res + '[' + stuff + ']'
else: res = res + re.escape(c)
return res
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment