Commit 4848f4fb authored by ben's avatar ben

Removed all files from src/ directory, added to rdiff_backup/


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@237 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent 72e15c94
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Coordinate corresponding files with different names
For instance, some source filenames may contain characters not allowed
on the mirror end. Also, if a source filename is very long (say 240
characters), the extra characters added to related increments may put
them over the usual 255 character limit.
"""
import re
from log import *
from robust import *
import Globals
max_filename_length = 255
# If true, enable character quoting, and set characters making
# regex-style range.
chars_to_quote = None
# These compiled regular expressions are used in quoting and unquoting
chars_to_quote_regexp = None
unquoting_regexp = None
# Use given char to quote. Default is set in Globals.
quoting_char = None
def set_init_quote_vals():
"""Set quoting value from Globals on all conns"""
for conn in Globals.connections:
conn.FilenameMapping.set_init_quote_vals_local()
def set_init_quote_vals_local():
"""Set value on local connection, initialize regexps"""
global chars_to_quote, quoting_char
chars_to_quote = Globals.chars_to_quote
if len(Globals.quoting_char) != 1:
Log.FatalError("Expected single character for quoting char,"
"got '%s' instead" % (Globals.quoting_char,))
quoting_char = Globals.quoting_char
init_quoting_regexps()
def init_quoting_regexps():
"""Compile quoting regular expressions"""
global chars_to_quote_regexp, unquoting_regexp
try:
chars_to_quote_regexp = \
re.compile("[%s%s]" % (chars_to_quote, quoting_char), re.S)
unquoting_regexp = re.compile("%s[0-9]{3}" % quoting_char, re.S)
except re.error:
Log.FatalError("Error '%s' when processing char quote list %s" %
(re.error, chars_to_quote))
def quote(path):
"""Return quoted version of given path
Any characters quoted will be replaced by the quoting char and
the ascii number of the character. For instance, "10:11:12"
would go to "10;05811;05812" if ":" were quoted and ";" were
the quoting character.
"""
return chars_to_quote_regexp.sub(quote_single, path)
def quote_single(match):
"""Return replacement for a single character"""
return "%s%03d" % (quoting_char, ord(match.group()))
def unquote(path):
"""Return original version of quoted filename"""
return unquoting_regexp.sub(unquote_single, path)
def unquote_single(match):
"""Unquote a single quoted character"""
assert len(match.group()) == 4
return chr(int(match.group()[1:]))
def get_quoted_dir_children(rpath):
"""For rpath directory, return list of quoted children in dir"""
if not rpath.isdir(): return []
dir_pairs = [(unquote(filename), filename)
for filename in Robust.listrp(rpath)]
dir_pairs.sort() # sort by real index, not quoted part
child_list = []
for unquoted, filename in dir_pairs:
childrp = rpath.append(unquoted)
childrp.quote_path()
child_list.append(childrp)
return child_list
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Hold a variety of constants usually set at initialization."""
import re, os
# The current version of rdiff-backup
version = "$version"
# If this is set, use this value in seconds as the current time
# instead of reading it from the clock.
current_time = None
# This determines how many bytes to read at a time when copying
blocksize = 32768
# This is used by the BufferedRead class to determine how many
# bytes to request from the underlying file per read(). Larger
# values may save on connection overhead and latency.
conn_bufsize = 98304
# True if script is running as a server
server = None
# uid and gid of the owner of the rdiff-backup process. This can
# vary depending on the connection.
process_uid = os.getuid()
process_gid = os.getgid()
# If true, when copying attributes, also change target's uid/gid
change_ownership = None
# If true, change the permissions of unwriteable mirror files
# (such as directories) so that they can be written, and then
# change them back. This defaults to 1 just in case the process
# is not running as root (root doesn't need to change
# permissions).
change_mirror_perms = (process_uid != 0)
# If true, temporarily change permissions of unreadable files in
# the source directory to make sure we can read all files.
change_source_perms = None
# If true, try to reset the atimes of the source partition.
preserve_atime = None
# This will be set as soon as the LocalConnection class loads
local_connection = None
# All connections should be added to the following list, so
# further global changes can be propagated to the remote systems.
# The first element should be Globals.local_connection. For a
# server, the second is the connection to the client.
connections = []
# Each process should have a connection number unique to the
# session. The client has connection number 0.
connection_number = 0
# Dictionary pairing connection numbers with connections. Set in
# SetConnections for all connections.
connection_dict = {}
# True if the script is the end that reads the source directory
# for backups. It is true for purely local sessions.
isbackup_reader = None
# Connection of the real backup reader (for which isbackup_reader
# is true)
backup_reader = None
# True if the script is the end that writes to the increment and
# mirror directories. True for purely local sessions.
isbackup_writer = None
# Connection of the backup writer
backup_writer = None
# Connection of the client
client_conn = None
# This list is used by the set function below. When a new
# connection is created with init_connection, its Globals class
# will match this one for all the variables mentioned in this
# list.
changed_settings = []
# rdiff-backup will try to checkpoint its state every
# checkpoint_interval seconds. Then when resuming, at most this
# amount of time is lost.
checkpoint_interval = 20
# The RPath of the rdiff-backup-data directory.
rbdir = None
# Indicates if a resume or a lack of resume is forced. This
# should be None for the default. 0 means don't resume, and 1
# means resume.
resume = None
# If there has been an aborted backup fewer than this many seconds
# ago, attempt to resume it where it left off instead of starting
# a new one.
resume_window = 7200
# This string is used when recognizing and creating time strings.
# If the time_separator is ":", then W3 datetime strings like
# 2001-12-07T04:22:01-07:00 are produced. It can be set to "_" to
# make filenames that don't contain colons, which aren't allowed
# under MS windows NT.
time_separator = ":"
# quoting_enabled is true if we should quote certain characters in
# filenames on the source side (see FilenameMapping for more
# info). chars_to_quote is a string whose characters should be
# quoted, and quoting_char is the character to quote with.
quoting_enabled = None
chars_to_quote = ""
quoting_char = ';'
# If true, emit output intended to be easily readable by a
# computer. False means output is intended for humans.
parsable_output = None
# If true, then hardlinks will be preserved to mirror and recorded
# in the increments directory. There is also a difference here
# between None and 0. When restoring, None or 1 means to preserve
# hardlinks iff can find a hardlink dictionary. 0 means ignore
# hardlink information regardless.
preserve_hardlinks = 1
# If this is false, then rdiff-backup will not compress any
# increments. Default is to compress based on regexp below.
compression = 1
# Increments based on files whose names match this
# case-insensitive regular expression won't be compressed (applies
# to .snapshots and .diffs). The second below will be the
# compiled version of the first.
no_compression_regexp_string = "(?i).*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|" \
"jpg|gif|png|jp2|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$"
no_compression_regexp = None
# If true, filelists and directory statistics will be split on
# nulls instead of newlines.
null_separator = None
# Determines whether or not ssh will be run with the -C switch
ssh_compression = 1
# If true, print statistics after successful backup
print_statistics = None
# On the reader and writer connections, the following will be
# replaced by the source and mirror Select objects respectively.
select_source, select_mirror = None, None
# On the backup writer connection, holds the root incrementing branch
# object. Access is provided to increment error counts.
ITRB = None
# Percentage of time to spend sleeping. None means never sleep.
sleep_ratio = None
# security_level has 4 values and controls which requests from remote
# systems will be honored. "all" means anything goes. "read-only"
# means that the requests must not write to disk. "update-only" means
# that requests shouldn't destructively update the disk (but normal
# incremental updates are OK). "minimal" means only listen to a few
# basic requests.
security_level = "all"
# If this is set, it indicates that the remote connection should only
# deal with paths inside of restrict_path.
restrict_path = None
def get(name):
"""Return the value of something in this module"""
return globals()[name]
def is_not_None(name):
"""Returns true if value is not None"""
return globals()[name] is not None
def set(name, val):
"""Set the value of something in this module
Use this instead of writing the values directly if the setting
matters to remote sides. This function updates the
changed_settings list, so other connections know to copy the
changes.
"""
changed_settings.append(name)
globals()[name] = val
def set_integer(name, val):
"""Like set, but make sure val is an integer"""
try: intval = int(val)
except ValueError:
Log.FatalError("Variable %s must be set to an integer -\n"
"received %s instead." % (name, val))
set(name, intval)
def set_float(name, val, min = None, max = None, inclusive = 1):
"""Like set, but make sure val is float within given bounds"""
def error():
s = "Variable %s must be set to a float" % (name,)
if min is not None and max is not None:
s += " between %s and %s " % (min, max)
if inclusive: s += "inclusive"
else: s += "not inclusive"
elif min is not None or max is not None:
if inclusive: inclusive_string = "or equal to "
else: inclusive_string = ""
if min is not None:
s += " greater than %s%s" % (inclusive_string, min)
else: s+= " less than %s%s" % (inclusive_string, max)
Log.FatalError(s)
try: f = float(val)
except ValueError: error()
if min is not None:
if inclusive and f < min: error()
elif not inclusive and f <= min: error()
if max is not None:
if inclusive and f > max: error()
elif not inclusive and f >= max: error()
set(name, f)
def get_dict_val(name, key):
"""Return val from dictionary in this class"""
return globals()[name][key]
def set_dict_val(name, key, val):
"""Set value for dictionary in this class"""
globals()[name][key] = val
def postset_regexp(name, re_string, flags = None):
"""Compile re_string on all existing connections, set to name"""
for conn in connections:
conn.Globals.postset_regexp_local(name, re_string, flags)
def postset_regexp_local(name, re_string, flags):
"""Set name to compiled re_string locally"""
if flags: globals()[name] = re.compile(re_string, flags)
else: globals()[name] = re.compile(re_string)
def set_select(dsrpath, tuplelist, quote_mode, *filelists):
"""Initialize select object using tuplelist
Note that each list in filelists must each be passed as
separate arguments, so each is recognized as a file by the
connection. Otherwise we will get an error because a list
containing files can't be pickled.
"""
global select_source, select_mirror
if dsrpath.source:
select_source = Select(dsrpath, quote_mode)
select_source.ParseArgs(tuplelist, filelists)
else:
select_mirror = Select(dsrpath, quote_mode)
select_mirror.ParseArgs(tuplelist, filelists)
from rpath import * # kludge to avoid circularity - not needed in this module
from selection import *
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Preserve and restore hard links
If the preserve_hardlinks option is selected, linked files in the
source directory will be linked in the mirror directory. Linked files
are treated like any other with respect to incrementing, but a
database of all links will be recorded at each session, so linked
files can still be restored from the increments.
All these functions are meant to be executed on the destination
side. The source side should only transmit inode information.
"""
from __future__ import generators
import cPickle
# In all of these lists of indicies are the values. The keys in
# _inode_ ones are (inode, devloc) pairs.
_src_inode_indicies = {}
_dest_inode_indicies = {}
# The keys for these two are just indicies. They share values
# with the earlier dictionaries.
_src_index_indicies = {}
_dest_index_indicies = {}
# When a linked file is restored, its path is added to this dict,
# so it can be found when later paths being restored are linked to
# it.
_restore_index_path = {}
def get_inode_key(rorp):
"""Return rorp's key for _inode_ dictionaries"""
return (rorp.getinode(), rorp.getdevloc())
def get_indicies(rorp, source):
"""Return a list of similarly linked indicies, using rorp's index"""
if source: dict = _src_index_indicies
else: dict = _dest_index_indicies
try: return dict[rorp.index]
except KeyError: return []
def add_rorp(rorp, source):
"""Process new rorp and update hard link dictionaries
First enter it into src_inode_indicies. If we have already
seen all the hard links, then we can delete the entry.
Everything must stay recorded in src_index_indicies though.
"""
if not rorp.isreg() or rorp.getnumlinks() < 2: return
if source:
inode_dict, index_dict = _src_inode_indicies, _src_index_indicies
else: inode_dict, index_dict = _dest_inode_indicies, _dest_index_indicies
rp_inode_key = get_inode_key(rorp)
if inode_dict.has_key(rp_inode_key):
index_list = inode_dict[rp_inode_key]
index_list.append(rorp.index)
if len(index_list) == rorp.getnumlinks():
del inode_dict[rp_inode_key]
else: # make new entry in both src dicts
index_list = [rorp.index]
inode_dict[rp_inode_key] = index_list
index_dict[rorp.index] = index_list
def add_rorp_iter(iter, source):
"""Return new rorp iterator like iter that add_rorp's first"""
for rorp in iter:
add_rorp(rorp, source)
yield rorp
def rorp_eq(src_rorp, dest_rorp):
"""Compare hardlinked for equality
Two files may otherwise seem equal but be hardlinked in
different ways. This function considers them equal enough if
they have been hardlinked correctly to the previously seen
indicies.
"""
if not src_rorp.index == dest_rorp.index: return None
if (not src_rorp.isreg() or not dest_rorp.isreg() or
src_rorp.getnumlinks() == dest_rorp.getnumlinks() == 1):
return 1 # Hard links don't apply
src_index_list = get_indicies(src_rorp, 1)
dest_index_list = get_indicies(dest_rorp, None)
# If a list only has one element, then it is only hardlinked
# to itself so far, so that is not a genuine difference yet.
if not src_index_list or len(src_index_list) == 1:
return not dest_index_list or len(dest_index_list) == 1
if not dest_index_list or len(dest_index_list) == 1: return None
# Both index lists exist and are non-empty
return src_index_list == dest_index_list # they are always sorted
def islinked(rorp):
"""True if rorp's index is already linked to something on src side"""
return len(get_indicies(rorp, 1)) >= 2
def restore_link(index, rpath):
"""Restores a linked file by linking it
When restoring, all the hardlink data is already present, and
we can only link to something already written. In either
case, add to the _restore_index_path dict, so we know later
that the file is available for hard
linking.
Returns true if succeeded in creating rpath, false if must
restore rpath normally.
"""
if index not in _src_index_indicies: return None
for linked_index in _src_index_indicies[index]:
if linked_index in _restore_index_path:
srcpath = _restore_index_path[linked_index]
Log("Restoring %s by hard linking to %s" %
(rpath.path, srcpath), 6)
rpath.hardlink(srcpath)
return 1
_restore_index_path[index] = rpath.path
return None
def link_rp(src_rorp, dest_rpath, dest_root = None):
"""Make dest_rpath into a link analogous to that of src_rorp"""
if not dest_root: dest_root = dest_rpath # use base of dest_rpath
dest_link_rpath = RPath(dest_root.conn, dest_root.base,
get_indicies(src_rorp, 1)[0])
dest_rpath.hardlink(dest_link_rpath.path)
def write_linkdict(rpath, dict, compress = None):
"""Write link data to the rbdata dir
It is stored as the a big pickled dictionary dated to match
the current hardlinks.
"""
assert (Globals.isbackup_writer and
rpath.conn is Globals.local_connection)
tf = TempFileManager.new(rpath)
def init():
fp = tf.open("wb", compress)
cPickle.dump(dict, fp)
assert not fp.close()
tf.setdata()
Robust.make_tf_robustaction(init, (tf,), (rpath,)).execute()
def get_linkrp(data_rpath, time, prefix):
"""Return RPath of linkdata, or None if cannot find"""
for rp in map(data_rpath.append, data_rpath.listdir()):
if (rp.isincfile() and rp.getincbase_str() == prefix and
(rp.getinctype() == 'snapshot' or rp.getinctype() == 'data')
and Time.stringtotime(rp.getinctime()) == time):
return rp
return None
def get_linkdata(data_rpath, time, prefix = 'hardlink_data'):
"""Return index dictionary written by write_linkdata at time"""
rp = get_linkrp(data_rpath, time, prefix)
if not rp: return None
fp = rp.open("rb", rp.isinccompressed())
index_dict = cPickle.load(fp)
assert not fp.close()
return index_dict
def final_writedata():
"""Write final checkpoint data to rbdir after successful backup"""
global final_inc
if _src_index_indicies:
Log("Writing hard link data", 6)
if Globals.compression:
final_inc = Globals.rbdir.append("hardlink_data.%s.data.gz" %
Time.curtimestr)
else: final_inc = Globals.rbdir.append("hardlink_data.%s.data" %
Time.curtimestr)
write_linkdict(final_inc, _src_index_indicies, Globals.compression)
else: # no hardlinks, so writing unnecessary
final_inc = None
def retrieve_final(time):
"""Set source index dictionary from hardlink_data file if avail"""
global _src_index_indicies
hd = get_linkdata(Globals.rbdir, time)
if hd is None: return None
_src_index_indicies = hd
return 1
def final_checkpoint(data_rpath):
"""Write contents of the four dictionaries to the data dir
If rdiff-backup receives a fatal error, it may still be able
to save the contents of the four hard link dictionaries.
Because these dictionaries may be big, they are not saved
after every 20 seconds or whatever, but just at the end.
"""
Log("Writing intermediate hard link data to disk", 2)
src_inode_rp = data_rpath.append("hardlink_source_inode_checkpoint."
"%s.data" % Time.curtimestr)
src_index_rp = data_rpath.append("hardlink_source_index_checkpoint."
"%s.data" % Time.curtimestr)
dest_inode_rp = data_rpath.append("hardlink_dest_inode_checkpoint."
"%s.data" % Time.curtimestr)
dest_index_rp = data_rpath.append("hardlink_dest_index_checkpoint."
"%s.data" % Time.curtimestr)
for (rp, dict) in ((src_inode_rp, _src_inode_indicies),
(src_index_rp, _src_index_indicies),
(dest_inode_rp, _dest_inode_indicies),
(dest_index_rp, _dest_index_indicies)):
write_linkdict(rp, dict)
def retrieve_checkpoint(data_rpath, time):
"""Retrieve hardlink data from final checkpoint
Return true if the retrieval worked, false otherwise.
"""
global _src_inode_indicies, _src_index_indicies
global _dest_inode_indicies, _dest_index_indicies
try:
src_inode = get_linkdata(data_rpath, time,
"hardlink_source_inode_checkpoint")
src_index = get_linkdata(data_rpath, time,
"hardlink_source_index_checkpoint")
dest_inode = get_linkdata(data_rpath, time,
"hardlink_dest_inode_checkpoint")
dest_index = get_linkdata(data_rpath, time,
"hardlink_dest_index_checkpoint")
except cPickle.UnpicklingError:
Log("Unpickling Error", 2)
return None
if (src_inode is None or src_index is None or
dest_inode is None or dest_index is None): return None
_src_inode_indicies, _src_index_indicies = src_inode, src_index
_dest_inode_indicies, _dest_index_indicies = dest_inode, dest_index
return 1
def remove_all_checkpoints():
"""Remove all hardlink checkpoint information from directory"""
prefix_list = ["hardlink_source_inode_checkpoint",
"hardlink_source_index_checkpoint",
"hardlink_dest_inode_checkpoint",
"hardlink_dest_index_checkpoint"]
for rp in map(Globals.rbdir.append, Globals.rbdir.listdir()):
if (rp.isincfile() and rp.getincbase_str() in prefix_list and
(rp.getinctype() == 'snapshot' or rp.getinctype() == 'data')):
rp.delete()
from log import *
from robust import *
from rpath import *
import Globals, Time
This diff is collapsed.
#!/usr/bin/env python
"""Read component files of rdiff-backup, and glue them together after
removing unnecessary bits."""
import os
def mystrip(filename):
"""Open filename, read input, strip appropriately, and return contents"""
fp = open(filename, "r")
lines = fp.readlines()
fp.close()
i = 0
while(lines[i][:60] !=
"############################################################"):
i = i+1
return "".join(lines[i:]).strip() + "\n\n\n"
files = ["globals.py", "static.py", "lazy.py", "log.py", "ttime.py",
"iterfile.py", "rdiff.py", "connection.py", "rpath.py",
"hardlink.py", "robust.py", "rorpiter.py",
"destructive_stepping.py", "selection.py",
"filename_mapping.py", "statistics.py", "increment.py",
"restore.py", "manage.py", "highlevel.py",
"setconnections.py", "main.py"]
os.system("cp header.py rdiff-backup")
outfp = open("rdiff-backup", "a")
for file in files:
outfp.write(mystrip(file))
outfp.close()
os.system("chmod 755 rdiff-backup")
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Misc statistics methods, pertaining to dir and session stat files"""
from statistics import *
# This is the RPath of the directory statistics file, and the
# associated open file. It will hold a line of statistics for
# each directory that is backed up.
_dir_stats_rp = None
_dir_stats_fp = None
# This goes at the beginning of the directory statistics file and
# explains the format.
_dir_stats_header = """# rdiff-backup directory statistics file
#
# Each line is in the following format:
# RelativeDirName %s
""" % " ".join(StatsObj.stat_file_attrs)
def open_dir_stats_file():
"""Open directory statistics file, write header"""
global _dir_stats_fp, _dir_stats_rp
assert not _dir_stats_fp, "Directory file already open"
if Globals.compression: suffix = "data.gz"
else: suffix = "data"
_dir_stats_rp = Inc.get_inc(Globals.rbdir.append("directory_statistics"),
Time.curtime, suffix)
if _dir_stats_rp.lstat():
Log("Warning, statistics file %s already exists, appending" %
_dir_stats_rp.path, 2)
_dir_stats_fp = _dir_stats_rp.open("ab", Globals.compression)
else: _dir_stats_fp = _dir_stats_rp.open("wb", Globals.compression)
_dir_stats_fp.write(_dir_stats_header)
def write_dir_stats_line(statobj, index):
"""Write info from statobj about rpath to statistics file"""
if Globals.null_separator:
_dir_stats_fp.write(statobj.get_stats_line(index, None) + "\0")
else: _dir_stats_fp.write(statobj.get_stats_line(index) + "\n")
def close_dir_stats_file():
"""Close directory statistics file if its open"""
global _dir_stats_fp
if _dir_stats_fp:
_dir_stats_fp.close()
_dir_stats_fp = None
def write_session_statistics(statobj):
"""Write session statistics into file, log"""
stat_inc = Inc.get_inc(Globals.rbdir.append("session_statistics"),
Time.curtime, "data")
statobj.StartTime = Time.curtime
statobj.EndTime = time.time()
# include hardlink data and dir stats in size of increments
if Globals.preserve_hardlinks and Hardlink.final_inc:
# include hardlink data in size of increments
statobj.IncrementFiles += 1
statobj.IncrementFileSize += Hardlink.final_inc.getsize()
if _dir_stats_rp and _dir_stats_rp.lstat():
statobj.IncrementFiles += 1
statobj.IncrementFileSize += _dir_stats_rp.getsize()
statobj.write_stats_to_rp(stat_inc)
if Globals.print_statistics:
message = statobj.get_stats_logstring("Session statistics")
Log.log_to_file(message)
Globals.client_conn.sys.stdout.write(message)
from increment import *
import Hardlink
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Invoke rdiff utility to make signatures, deltas, or patch
All these operations should be done in a relatively safe manner using
RobustAction and the like.
"""
import os, librsync
class RdiffException(Exception): pass
def get_signature(rp):
"""Take signature of rpin file and return in file object"""
Log("Getting signature of %s" % rp.path, 7)
return librsync.SigFile(rp.open("rb"))
def get_delta_sigfileobj(sig_fileobj, rp_new):
"""Like get_delta but signature is in a file object"""
Log("Getting delta of %s with signature stream" % (rp_new.path,), 7)
return librsync.DeltaFile(sig_fileobj, rp_new.open("rb"))
def get_delta_sigrp(rp_signature, rp_new):
"""Take signature rp and new rp, return delta file object"""
Log("Getting delta of %s with signature %s" %
(rp_new.path, rp_signature.path), 7)
return librsync.DeltaFile(rp_signature.open("rb"), rp_new.open("rb"))
def write_delta_action(basis, new, delta, compress = None):
"""Return action writing delta which brings basis to new
If compress is true, the output of rdiff will be gzipped
before written to delta.
"""
delta_tf = TempFileManager.new(delta)
def init(): write_delta(basis, new, delta_tf, compress)
return Robust.make_tf_robustaction(init, delta_tf, delta)
def write_delta(basis, new, delta, compress = None):
"""Write rdiff delta which brings basis to new"""
Log("Writing delta %s from %s -> %s" %
(basis.path, new.path, delta.path), 7)
sigfile = librsync.SigFile(basis.open("rb"))
deltafile = librsync.DeltaFile(sigfile, new.open("rb"))
delta.write_from_fileobj(deltafile, compress)
def patch_action(rp_basis, rp_delta, rp_out = None, out_tf = None,
delta_compressed = None):
"""Return RobustAction which patches rp_basis with rp_delta
If rp_out is None, put output in rp_basis. Will use TempFile
out_tf it is specified. If delta_compressed is true, the
delta file will be decompressed before processing with rdiff.
"""
if not rp_out: rp_out = rp_basis
if not out_tf: out_tf = TempFileManager.new(rp_out)
def init():
rp_basis.conn.Rdiff.patch_local(rp_basis, rp_delta,
out_tf, delta_compressed)
out_tf.setdata()
return Robust.make_tf_robustaction(init, out_tf, rp_out)
def patch_local(rp_basis, rp_delta, outrp, delta_compressed = None):
"""Patch routine that must be run on rp_basis.conn
This is because librsync may need to seek() around in rp_basis,
and so needs a real file. Other rpaths can be remote.
"""
assert rp_basis.conn is Globals.local_connection
if delta_compressed: deltafile = rp_delta.open("rb", 1)
else: deltafile = rp_delta.open("rb")
sigfile = librsync.SigFile(rp_basis.open("rb"))
patchfile = librsync.PatchedFile(rp_basis.open("rb"), deltafile)
outrp.write_from_fileobj(patchfile)
def patch_with_attribs_action(rp_basis, rp_delta, rp_out = None):
"""Like patch_action, but also transfers attributs from rp_delta"""
if not rp_out: rp_out = rp_basis
tf = TempFileManager.new(rp_out)
return Robust.chain_nested(patch_action(rp_basis, rp_delta, rp_out, tf),
Robust.copy_attribs_action(rp_delta, tf))
def copy_action(rpin, rpout):
"""Use rdiff to copy rpin to rpout, conserving bandwidth"""
if not rpin.isreg() or not rpout.isreg() or rpin.conn is rpout.conn:
# rdiff not applicable, fallback to regular copying
return Robust.copy_action(rpin, rpout)
Log("Rdiff copying %s to %s" % (rpin.path, rpout.path), 6)
out_tf = TempFileManager.new(rpout)
def init(): rpout.conn.Rdiff.copy_local(rpin, rpout, out_tf)
return Robust.make_tf_robustaction(init, out_tf, rpout)
def copy_local(rpin, rpout, rpnew):
"""Write rpnew == rpin using rpout as basis. rpout and rpnew local"""
assert rpnew.conn is rpout.conn is Globals.local_connection
sigfile = librsync.SigFile(rpout.open("rb"))
deltafile = rpin.conn.librsync.DeltaFile(sigfile, rpin.open("rb"))
rpnew.write_from_fileobj(librsync.PatchedFile(rpout.open("rb"), deltafile))
from log import *
from robust import *
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Functions to make sure remote requests are kosher"""
import sys, tempfile
import Globals, Main
from rpath import *
class Violation(Exception):
"""Exception that indicates an improper request has been received"""
pass
# This will store the list of functions that will be honored from
# remote connections.
allowed_requests = None
# This stores the list of global variables that the client can not
# set on the server.
disallowed_server_globals = ["server", "security_level", "restrict_path"]
def initialize(action, cmdpairs):
"""Initialize allowable request list and chroot"""
global allowed_requests
set_security_level(action, cmdpairs)
set_allowed_requests(Globals.security_level)
def set_security_level(action, cmdpairs):
"""If running client, set security level and restrict_path
To find these settings, we must look at the action to see what is
supposed to happen, and then look at the cmdpairs to see what end
the client is on.
"""
def islocal(cmdpair): return not cmdpair[0]
def bothlocal(cp1, cp2): return islocal(cp1) and islocal(cp2)
def bothremote(cp1, cp2): return not islocal(cp1) and not islocal(cp2)
def getpath(cmdpair): return cmdpair[1]
if Globals.server: return
cp1 = cmdpairs[0]
if len(cmdpairs) > 1: cp2 = cmdpairs[1]
if action == "backup":
if bothlocal(cp1, cp2) or bothremote(cp1, cp2):
sec_level = "minimal"
rdir = tempfile.gettempdir()
elif islocal(cp1):
sec_level = "read-only"
rdir = getpath(cp1)
else:
assert islocal(cp2)
sec_level = "update-only"
rdir = getpath(cp2)
elif action == "restore" or action == "restore-as-of":
if len(cmdpairs) == 1 or bothlocal(cp1, cp2) or bothremote(cp1, cp2):
sec_level = "minimal"
rdir = tempfile.gettempdir()
elif islocal(cp1):
sec_level = "read-only"
rdir = Main.restore_get_root(RPath(Globals.local_connection,
getpath(cp1)))[0].path
else:
assert islocal(cp2)
sec_level = "all"
rdir = getpath(cp2)
elif action == "mirror":
if bothlocal(cp1, cp2) or bothremote(cp1, cp2):
sec_level = "minimal"
rdir = tempfile.gettempdir()
elif islocal(cp1):
sec_level = "read-only"
rdir = getpath(cp1)
else:
assert islocal(cp2)
sec_level = "all"
rdir = getpath(cp2)
elif (action == "test-server" or action == "list-increments" or
action == "list-changed-since" or action ==
"calculate-average" or action == "remove-older-than"):
sec_level = "minimal"
rdir = tempfile.gettempdir()
else: assert 0, "Unknown action %s" % action
Globals.security_level = sec_level
Globals.restrict_path = RPath(Globals.local_connection,
rdir).normalize().path
def set_allowed_requests(sec_level):
"""Set the allowed requests list using the security level"""
global allowed_requests
if sec_level == "all": return
allowed_requests = ["VirtualFile.readfromid", "VirtualFile.closebyid",
"Globals.get", "Globals.is_not_None",
"Globals.get_dict_val",
"Log.open_logfile_allconn",
"Log.close_logfile_allconn",
"SetConnections.add_redirected_conn",
"RedirectedRun",
"sys.stdout.write"]
if sec_level == "minimal": pass
elif sec_level == "read-only" or sec_level == "update-only":
allowed_requests.extend(["C.make_file_dict",
"os.getuid",
"os.listdir",
"Time.setcurtime_local",
"Resume.ResumeCheck",
"HLSourceStruct.split_initial_dsiter",
"HLSourceStruct.get_diffs_and_finalize",
"RPathStatic.gzip_open_local_read",
"RPathStatic.open_local_read"])
if sec_level == "update-only":
allowed_requests. \
extend(["Log.open_logfile_local", "Log.close_logfile_local",
"Log.close_logfile_allconn", "Log.log_to_file",
"SaveState.init_filenames",
"SaveState.touch_last_file",
"HLDestinationStruct.get_sigs",
"HLDestinationStruct.patch_w_datadir_writes",
"HLDestinationStruct.patch_and_finalize",
"HLDestinationStruct.patch_increment_and_finalize",
"Main.backup_touch_curmirror_local",
"Globals.ITRB.increment_stat"])
if Globals.server:
allowed_requests.extend(["SetConnections.init_connection_remote",
"Log.setverbosity",
"Log.setterm_verbosity",
"Time.setprevtime_local",
"FilenameMapping.set_init_quote_vals_local",
"Globals.postset_regexp_local",
"Globals.set_select",
"HLSourceStruct.set_session_info",
"HLDestinationStruct.set_session_info"])
def vet_request(request, arglist):
"""Examine request for security violations"""
#if Globals.server: sys.stderr.write(str(request) + "\n")
security_level = Globals.security_level
if Globals.restrict_path:
for arg in arglist:
if isinstance(arg, RPath): vet_rpath(arg)
if security_level == "all": return
if request.function_string in allowed_requests: return
if request.function_string == "Globals.set":
if Globals.server and arglist[0] not in disallowed_server_globals:
return
raise Violation("\nWarning Security Violation!\n"
"Bad request for function: %s\n"
"with arguments: %s\n" % (request.function_string,
arglist))
def vet_rpath(rpath):
"""Require rpath not to step outside retricted directory"""
if Globals.restrict_path and rpath.conn is Globals.local_connection:
normalized, restrict = rpath.normalize().path, Globals.restrict_path
components = normalized.split("/")
# 3 cases for restricted dir /usr/foo: /var, /usr/foobar, /usr/foo/..
if (not normalized.startswith(restrict) or
(len(normalized) > len(restrict) and
normalized[len(restrict)] != "/") or
".." in components):
raise Violation("\nWarning Security Violation!\n"
"Request to handle path %s\n"
"which doesn't appear to be within "
"restrict path %s.\n" % (normalized, restrict))
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Parse args and setup connections
The functions in this module are used once by Main to parse file
descriptions like bescoto@folly.stanford.edu:/usr/bin/ls and to set up
the related connections.
"""
# This is the schema that determines how rdiff-backup will open a
# pipe to the remote system. If the file is given as A::B, %s will
# be substituted with A in the schema.
__cmd_schema = 'ssh -C %s rdiff-backup --server'
__cmd_schema_no_compress = 'ssh %s rdiff-backup --server'
# This is a list of remote commands used to start the connections.
# The first is None because it is the local connection.
__conn_remote_cmds = [None]
class SetConnectionsException(Exception): pass
def get_cmd_pairs(arglist, remote_schema = None, remote_cmd = None):
"""Map the given file descriptions into command pairs
Command pairs are tuples cmdpair with length 2. cmdpair[0] is
None iff it describes a local path, and cmdpair[1] is the path.
"""
global __cmd_schema
if remote_schema: __cmd_schema = remote_schema
elif not Globals.ssh_compression: __cmd_schema = __cmd_schema_no_compress
if not arglist: return []
desc_pairs = map(parse_file_desc, arglist)
if filter(lambda x: x[0], desc_pairs): # True if any host_info found
if remote_cmd:
Log.FatalError("The --remote-cmd flag is not compatible "
"with remote file descriptions.")
elif remote_schema:
Log("Remote schema option ignored - no remote file "
"descriptions.", 2)
cmdpairs = map(desc2cmd_pairs, desc_pairs)
if remote_cmd: # last file description gets remote_cmd
cmd_pairs[-1] = (remote_cmd, cmd_pairs[-1][1])
return cmdpairs
def cmdpair2rp(cmd_pair):
"""Return normalized RPath from cmd_pair (remote_cmd, filename)"""
cmd, filename = cmd_pair
if cmd: conn = init_connection(cmd)
else: conn = Globals.local_connection
return RPath(conn, filename).normalize()
def desc2cmd_pairs(desc_pair):
"""Return pair (remote_cmd, filename) from desc_pair"""
host_info, filename = desc_pair
if not host_info: return (None, filename)
else: return (fill_schema(host_info), filename)
def parse_file_desc(file_desc):
"""Parse file description returning pair (host_info, filename)
In other words, bescoto@folly.stanford.edu::/usr/bin/ls =>
("bescoto@folly.stanford.edu", "/usr/bin/ls"). The
complication is to allow for quoting of : by a \. If the
string is not separated by :, then the host_info is None.
"""
def check_len(i):
if i >= len(file_desc):
raise SetConnectionsException(
"Unexpected end to file description %s" % file_desc)
host_info_list, i, last_was_quoted = [], 0, None
while 1:
if i == len(file_desc):
return (None, file_desc)
if file_desc[i] == '\\':
i = i+1
check_len(i)
last_was_quoted = 1
elif (file_desc[i] == ":" and i > 0 and file_desc[i-1] == ":"
and not last_was_quoted):
host_info_list.pop() # Remove last colon from name
break
else: last_was_quoted = None
host_info_list.append(file_desc[i])
i = i+1
check_len(i+1)
return ("".join(host_info_list), file_desc[i+1:])
def fill_schema(host_info):
"""Fills host_info into the schema and returns remote command"""
return __cmd_schema % host_info
def init_connection(remote_cmd):
"""Run remote_cmd, register connection, and then return it
If remote_cmd is None, then the local connection will be
returned. This also updates some settings on the remote side,
like global settings, its connection number, and verbosity.
"""
if not remote_cmd: return Globals.local_connection
Log("Executing " + remote_cmd, 4)
stdin, stdout = os.popen2(remote_cmd)
conn_number = len(Globals.connections)
conn = PipeConnection(stdout, stdin, conn_number)
check_connection_version(conn, remote_cmd)
Log("Registering connection %d" % conn_number, 7)
init_connection_routing(conn, conn_number, remote_cmd)
init_connection_settings(conn)
return conn
def check_connection_version(conn, remote_cmd):
"""Log warning if connection has different version"""
try: remote_version = conn.Globals.get('version')
except ConnectionReadError, exception:
Log.FatalError("""%s
Couldn't start up the remote connection by executing
%s
Remember that, under the default settings, rdiff-backup must be
installed in the PATH on the remote system. See the man page for more
information.""" % (exception, remote_cmd))
if remote_version != Globals.version:
Log("Warning: Local version %s does not match remote version %s."
% (Globals.version, remote_version), 2)
def init_connection_routing(conn, conn_number, remote_cmd):
"""Called by init_connection, establish routing, conn dict"""
Globals.connection_dict[conn_number] = conn
conn.SetConnections.init_connection_remote(conn_number)
for other_remote_conn in Globals.connections[1:]:
conn.SetConnections.add_redirected_conn(
other_remote_conn.conn_number)
other_remote_conn.SetConnections.add_redirected_conn(conn_number)
Globals.connections.append(conn)
__conn_remote_cmds.append(remote_cmd)
def init_connection_settings(conn):
"""Tell new conn about log settings and updated globals"""
conn.Log.setverbosity(Log.verbosity)
conn.Log.setterm_verbosity(Log.term_verbosity)
for setting_name in Globals.changed_settings:
conn.Globals.set(setting_name, Globals.get(setting_name))
FilenameMapping.set_init_quote_vals()
def init_connection_remote(conn_number):
"""Run on server side to tell self that have given conn_number"""
Globals.connection_number = conn_number
Globals.local_connection.conn_number = conn_number
Globals.connection_dict[0] = Globals.connections[1]
Globals.connection_dict[conn_number] = Globals.local_connection
def add_redirected_conn(conn_number):
"""Run on server side - tell about redirected connection"""
Globals.connection_dict[conn_number] = \
RedirectedConnection(conn_number)
def UpdateGlobal(setting_name, val):
"""Update value of global variable across all connections"""
for conn in Globals.connections:
conn.Globals.set(setting_name, val)
def BackupInitConnections(reading_conn, writing_conn):
"""Backup specific connection initialization"""
reading_conn.Globals.set("isbackup_reader", 1)
writing_conn.Globals.set("isbackup_writer", 1)
UpdateGlobal("backup_reader", reading_conn)
UpdateGlobal("backup_writer", writing_conn)
if (Globals.change_source_perms and
reading_conn.Globals.get("process_uid") == 0):
Log("Warning: --change_source_perms should usually not be used when\n"
"the reading connection is running as root, because root can\n"
"read all files regardless of their permissions.", 2)
def CloseConnections():
"""Close all connections. Run by client"""
assert not Globals.server
for conn in Globals.connections: conn.quit()
del Globals.connections[1:] # Only leave local connection
Globals.connection_dict = {0: Globals.local_connection}
Globals.backup_reader = Globals.isbackup_reader = \
Globals.backup_writer = Globals.isbackup_writer = None
def TestConnections():
"""Test connections, printing results"""
if len(Globals.connections) == 1: print "No remote connections specified"
else:
for i in range(1, len(Globals.connections)): test_connection(i)
def test_connection(conn_number):
"""Test connection. conn_number 0 is the local connection"""
print "Testing server started by: ", __conn_remote_cmds[conn_number]
conn = Globals.connections[conn_number]
try:
assert conn.pow(2,3) == 8
assert conn.os.path.join("a", "b") == "a/b"
version = conn.reval("lambda: Globals.version")
except:
sys.stderr.write("Server tests failed\n")
raise
if not version == Globals.version:
print """Server may work, but there is a version mismatch:
Local version: %s
Remote version: %s""" % (Globals.version, version)
else: print "Server OK"
from log import *
from rpath import *
from connection import *
import Globals, FilenameMapping
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Provide time related exceptions and functions"""
import time, types, re
import Globals
class TimeException(Exception): pass
_interval_conv_dict = {"s": 1, "m": 60, "h": 3600, "D": 86400,
"W": 7*86400, "M": 30*86400, "Y": 365*86400}
_integer_regexp = re.compile("^[0-9]+$")
_interval_regexp = re.compile("^([0-9]+)([smhDWMY])")
_genstr_date_regexp1 = re.compile("^(?P<year>[0-9]{4})[-/]"
"(?P<month>[0-9]{1,2})[-/](?P<day>[0-9]{1,2})$")
_genstr_date_regexp2 = re.compile("^(?P<month>[0-9]{1,2})[-/]"
"(?P<day>[0-9]{1,2})[-/](?P<year>[0-9]{4})$")
curtime = curtimestr = None
been_awake_since = None # stores last time sleep() was run
def setcurtime(curtime = None):
"""Sets the current time in curtime and curtimestr on all systems"""
t = curtime or time.time()
for conn in Globals.connections:
conn.Time.setcurtime_local(t)
def setcurtime_local(timeinseconds):
"""Only set the current time locally"""
global curtime, curtimestr
curtime, curtimestr = timeinseconds, timetostring(timeinseconds)
def setprevtime(timeinseconds):
"""Sets the previous inc time in prevtime and prevtimestr"""
assert timeinseconds > 0, timeinseconds
timestr = timetostring(timeinseconds)
for conn in Globals.connections:
conn.Time.setprevtime_local(timeinseconds, timestr)
def setprevtime_local(timeinseconds, timestr):
"""Like setprevtime but only set the local version"""
global prevtime, prevtimestr
prevtime, prevtimestr = timeinseconds, timestr
def timetostring(timeinseconds):
"""Return w3 datetime compliant listing of timeinseconds"""
return time.strftime("%Y-%m-%dT%H" + Globals.time_separator +
"%M" + Globals.time_separator + "%S",
time.localtime(timeinseconds)) + gettzd()
def stringtotime(timestring):
"""Return time in seconds from w3 timestring
If there is an error parsing the string, or it doesn't look
like a w3 datetime string, return None.
"""
try:
date, daytime = timestring[:19].split("T")
year, month, day = map(int, date.split("-"))
hour, minute, second = map(int,
daytime.split(Globals.time_separator))
assert 1900 < year < 2100, year
assert 1 <= month <= 12
assert 1 <= day <= 31
assert 0 <= hour <= 23
assert 0 <= minute <= 59
assert 0 <= second <= 61 # leap seconds
timetuple = (year, month, day, hour, minute, second, -1, -1, -1)
if time.daylight:
utc_in_secs = time.mktime(timetuple) - time.altzone
else: utc_in_secs = time.mktime(timetuple) - time.timezone
return long(utc_in_secs) + tzdtoseconds(timestring[19:])
except (TypeError, ValueError, AssertionError): return None
def timetopretty(timeinseconds):
"""Return pretty version of time"""
return time.asctime(time.localtime(timeinseconds))
def stringtopretty(timestring):
"""Return pretty version of time given w3 time string"""
return timetopretty(stringtotime(timestring))
def inttopretty(seconds):
"""Convert num of seconds to readable string like "2 hours"."""
partlist = []
hours, seconds = divmod(seconds, 3600)
if hours > 1: partlist.append("%d hours" % hours)
elif hours == 1: partlist.append("1 hour")
minutes, seconds = divmod(seconds, 60)
if minutes > 1: partlist.append("%d minutes" % minutes)
elif minutes == 1: partlist.append("1 minute")
if seconds == 1: partlist.append("1 second")
elif not partlist or seconds > 1:
if isinstance(seconds, int) or isinstance(seconds, long):
partlist.append("%s seconds" % seconds)
else: partlist.append("%.2f seconds" % seconds)
return " ".join(partlist)
def intstringtoseconds(interval_string):
"""Convert a string expressing an interval (e.g. "4D2s") to seconds"""
def error():
raise TimeException("""Bad interval string "%s"
Intervals are specified like 2Y (2 years) or 2h30m (2.5 hours). The
allowed special characters are s, m, h, D, W, M, and Y. See the man
page for more information.
""" % interval_string)
if len(interval_string) < 2: error()
total = 0
while interval_string:
match = _interval_regexp.match(interval_string)
if not match: error()
num, ext = int(match.group(1)), match.group(2)
if not ext in _interval_conv_dict or num < 0: error()
total += num*_interval_conv_dict[ext]
interval_string = interval_string[match.end(0):]
return total
def gettzd():
"""Return w3's timezone identification string.
Expresed as [+/-]hh:mm. For instance, PST is -08:00. Zone is
coincides with what localtime(), etc., use.
"""
if time.daylight: offset = -1 * time.altzone/60
else: offset = -1 * time.timezone/60
if offset > 0: prefix = "+"
elif offset < 0: prefix = "-"
else: return "Z" # time is already in UTC
hours, minutes = map(abs, divmod(offset, 60))
assert 0 <= hours <= 23
assert 0 <= minutes <= 59
return "%s%02d%s%02d" % (prefix, hours,
Globals.time_separator, minutes)
def tzdtoseconds(tzd):
"""Given w3 compliant TZD, return how far ahead UTC is"""
if tzd == "Z": return 0
assert len(tzd) == 6 # only accept forms like +08:00 for now
assert (tzd[0] == "-" or tzd[0] == "+") and \
tzd[3] == Globals.time_separator
return -60 * (60 * int(tzd[:3]) + int(tzd[4:]))
def cmp(time1, time2):
"""Compare time1 and time2 and return -1, 0, or 1"""
if type(time1) is types.StringType:
time1 = stringtotime(time1)
assert time1 is not None
if type(time2) is types.StringType:
time2 = stringtotime(time2)
assert time2 is not None
if time1 < time2: return -1
elif time1 == time2: return 0
else: return 1
def sleep(sleep_ratio):
"""Sleep for period to maintain given sleep_ratio
On my system sleeping for periods less than 1/20th of a second
doesn't seem to work very accurately, so accumulate at least that
much time before sleeping.
"""
global been_awake_since
if been_awake_since is None: # first running
been_awake_since = time.time()
else:
elapsed_time = time.time() - been_awake_since
sleep_time = elapsed_time * (sleep_ratio/(1-sleep_ratio))
if sleep_time >= 0.05:
time.sleep(sleep_time)
been_awake_since = time.time()
def genstrtotime(timestr, curtime = None):
"""Convert a generic time string to a time in seconds"""
if curtime is None: curtime = globals()['curtime']
if timestr == "now": return curtime
def error():
raise TimeException("""Bad time string "%s"
The acceptible time strings are intervals (like "3D64s"), w3-datetime
strings, like "2002-04-26T04:22:01-07:00" (strings like
"2002-04-26T04:22:01" are also acceptable - rdiff-backup will use the
current time zone), or ordinary dates like 2/4/1997 or 2001-04-23
(various combinations are acceptable, but the month always precedes
the day).""" % timestr)
# Test for straight integer
if _integer_regexp.search(timestr): return int(timestr)
# Test for w3-datetime format, possibly missing tzd
t = stringtotime(timestr) or stringtotime(timestr+gettzd())
if t: return t
try: # test for an interval, like "2 days ago"
return curtime - intstringtoseconds(timestr)
except TimeException: pass
# Now check for dates like 2001/3/23
match = _genstr_date_regexp1.search(timestr) or \
_genstr_date_regexp2.search(timestr)
if not match: error()
timestr = "%s-%02d-%02dT00:00:00%s" % (match.group('year'),
int(match.group('month')), int(match.group('day')), gettzd())
t = stringtotime(timestr)
if t: return t
else: error()
This diff is collapsed.
/* ----------------------------------------------------------------------- *
*
* Copyright 2002 Ben Escoto
*
* This file is part of rdiff-backup.
*
* rdiff-backup is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, Inc., 675 Mass Ave,
* Cambridge MA 02139, USA; either version 2 of the License, or (at
* your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
#include <Python.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
/* choose the appropriate stat and fstat functions and return structs */
/* This code taken from Python's posixmodule.c */
#undef STAT
#if defined(MS_WIN64) || defined(MS_WIN32)
# define STAT _stati64
# define FSTAT _fstati64
# define STRUCT_STAT struct _stati64
#else
# define STAT stat
# define FSTAT fstat
# define STRUCT_STAT struct stat
#endif
static PyObject *UnknownFileTypeError;
static PyObject *c_make_file_dict(PyObject *self, PyObject *args);
static PyObject *long2str(PyObject *self, PyObject *args);
static PyObject *str2long(PyObject *self, PyObject *args);
/* Turn a stat structure into a python dictionary. The preprocessor
stuff taken from Python's posixmodule.c */
static PyObject *c_make_file_dict(self, args)
PyObject *self;
PyObject *args;
{
PyObject *size, *inode, *mtime, *atime, *devloc, *return_val;
char *filename, filetype[5];
STRUCT_STAT sbuf;
long int mode, perms;
int res;
if (!PyArg_ParseTuple(args, "s", &filename)) return NULL;
Py_BEGIN_ALLOW_THREADS
res = lstat(filename, &sbuf);
Py_END_ALLOW_THREADS
if (res != 0) {
if (errno == ENOENT || errno == ENOTDIR)
return Py_BuildValue("{s:s}", "type", NULL);
else {
PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
return NULL;
}
}
#ifdef HAVE_LARGEFILE_SUPPORT
size = PyLong_FromLongLong((LONG_LONG)sbuf.st_size);
inode = PyLong_FromLongLong((LONG_LONG)sbuf.st_ino);
#else
size = PyInt_FromLong(sbuf.st_size);
inode = PyInt_FromLong((long)sbuf.st_ino);
#endif
mode = (long)sbuf.st_mode;
perms = mode & 07777;
#if defined(HAVE_LONG_LONG) && !defined(MS_WINDOWS)
devloc = PyLong_FromLongLong((LONG_LONG)sbuf.st_dev);
#else
devloc = PyInt_FromLong((long)sbuf.st_dev);
#endif
#if SIZEOF_TIME_T > SIZEOF_LONG
mtime = PyLong_FromLongLong((LONG_LONG)sbuf.st_mtime);
atime = PyLong_FromLongLong((LONG_LONG)sbuf.st_atime);
#else
mtime = PyInt_FromLong((long)sbuf.st_mtime);
atime = PyInt_FromLong((long)sbuf.st_atime);
#endif
/* Build return dictionary from stat struct */
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISSOCK(mode) || S_ISFIFO(mode)) {
/* Regular files, directories, sockets, and fifos */
if S_ISREG(mode) strcpy(filetype, "reg");
else if S_ISDIR(mode) strcpy(filetype, "dir");
else if S_ISSOCK(mode) strcpy(filetype, "sock");
else strcpy(filetype, "fifo");
return_val = Py_BuildValue("{s:s,s:O,s:l,s:l,s:l,s:O,s:O,s:l,s:O,s:O}",
"type", filetype,
"size", size,
"perms", perms,
"uid", (long)sbuf.st_uid,
"gid", (long)sbuf.st_gid,
"inode", inode,
"devloc", devloc,
"nlink", (long)sbuf.st_nlink,
"mtime", mtime,
"atime", atime);
} else if S_ISLNK(mode) {
/* Symbolic links */
char linkname[1024];
int len_link = readlink(filename, linkname, 1023);
if (len_link < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return_val = NULL;
} else {
linkname[len_link] = '\0';
return_val = Py_BuildValue("{s:s,s:O,s:l,s:l,s:l,s:O,s:O,s:l,s:s}",
"type", "sym",
"size", size,
"perms", perms,
"uid", (long)sbuf.st_uid,
"gid", (long)sbuf.st_gid,
"inode", inode,
"devloc", devloc,
"nlink", (long)sbuf.st_nlink,
"linkname", linkname);
}
} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
/* Device files */
char devtype[2];
#if defined(HAVE_LONG_LONG) && !defined(MS_WINDOWS)
LONG_LONG devnums = (LONG_LONG)sbuf.st_rdev;
PyObject *major_num = PyLong_FromLongLong(major(devnums));
#else
long int devnums = (long)sbuf.st_dev;
PyObject *major_num = PyInt_FromLong(devnums >> 8);
#endif
int minor_num = (int)(minor(devnums));
if S_ISCHR(mode) strcpy(devtype, "c");
else strcpy(devtype, "b");
return_val = Py_BuildValue("{s:s,s:O,s:l,s:l,s:l,s:O,s:O,s:l,s:N}",
"type", "dev",
"size", size,
"perms", perms,
"uid", (long)sbuf.st_uid,
"gid", (long)sbuf.st_gid,
"inode", inode,
"devloc", devloc,
"nlink", (long)sbuf.st_nlink,
"devnums", Py_BuildValue("(s,O,i)", devtype,
major_num, minor_num));
Py_DECREF(major_num);
} else {
/* Unrecognized file type - raise exception */
PyErr_SetString(UnknownFileTypeError, filename);
return_val = NULL;
}
Py_DECREF(size);
Py_DECREF(inode);
Py_DECREF(devloc);
Py_DECREF(mtime);
Py_DECREF(atime);
return return_val;
}
/* Convert python long into 7 byte string */
static PyObject *long2str(self, args)
PyObject *self;
PyObject *args;
{
unsigned char s[7];
PyLongObject *pylong;
PyObject *return_val;
if (!PyArg_ParseTuple(args, "O!", &PyLong_Type, &pylong)) return NULL;
if (_PyLong_AsByteArray(pylong, s, 7, 0, 0) != 0) return NULL;
else return Py_BuildValue("s#", s, 7);
return return_val;
}
/* Reverse of above; convert 7 byte string into python long */
static PyObject *str2long(self, args)
PyObject *self;
PyObject *args;
{
unsigned char *s;
int ssize;
if (!PyArg_ParseTuple(args, "s#", &s, &ssize)) return NULL;
if (ssize != 7) {
PyErr_SetString(PyExc_TypeError, "Single argument must be 7 char string");
return NULL;
}
return _PyLong_FromByteArray(s, 7, 0, 0);
}
static PyMethodDef CMethods[] = {
{"make_file_dict", c_make_file_dict, METH_VARARGS,
"Make dictionary from file stat"},
{"long2str", long2str, METH_VARARGS, "Convert python long to 7 byte string"},
{"str2long", str2long, METH_VARARGS, "Convert 7 byte string to python long"},
{NULL, NULL, 0, NULL}
};
void initC(void)
{
PyObject *m, *d;
m = Py_InitModule("C", CMethods);
d = PyModule_GetDict(m);
UnknownFileTypeError = PyErr_NewException("C.UnknownFileTypeError",
NULL, NULL);
PyDict_SetItemString(d, "UnknownFileTypeError", UnknownFileTypeError);
}
#!/usr/bin/env python
import sys, os
from distutils.core import setup, Extension
assert len(sys.argv) == 1
sys.argv.append("build")
setup(name="CModule",
version="0.9.0",
description="rdiff-backup's C component",
ext_modules=[Extension("C", ["cmodule.c"]),
Extension("_librsync", ["_librsyncmodule.c"],
libraries=["rsync"])])
assert not os.system("mv build/lib.linux-i686-2.2/C.so .")
assert not os.system("mv build/lib.linux-i686-2.2/_librsync.so .")
assert not os.system("rm -rf build")
This diff is collapsed.
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Deal with side effects from traversing trees"""
from __future__ import generators
import types
from rpath import *
from lazy import *
class DSRPPermError(Exception):
"""Exception used when a DSRPath can't get sufficient permissions"""
pass
class DSRPath(RPath):
"""Destructive Stepping RPath
Sometimes when we traverse the directory tree, even when we just
want to read files, we have to change things, like the permissions
of a file or directory in order to read it, or the file's access
times. This class is like an RPath, but the permission and time
modifications are delayed, so that they can be done at the very
end when they won't be disturbed later.
Here are the new class variables:
delay_perms - true iff future perm changes should be delayed
newperms - holds the perm values while they are delayed
delay_atime - true iff some atime change are being delayed
newatime - holds the new atime
delay_mtime - true if some mtime change is being delayed
newmtime - holds the new mtime
"""
def __init__(self, source, conn_or_rp, base = 0, index = ()):
"""Initialize DSRP
Source should be true iff the DSRPath is taken from the
"source" partition and thus settings like
Globals.change_source_perms should be paid attention to.
If args is [rpath], return the dsrpath equivalent of rpath,
otherwise use the same arguments as the RPath initializer.
"""
if base == 0:
assert isinstance(conn_or_rp, RPath)
RPath.__init__(self, conn_or_rp.conn,
conn_or_rp.base, conn_or_rp.index)
self.path = conn_or_rp.path # conn_or_rp may be quoted
else: RPath.__init__(self, conn_or_rp, base, index)
if source != "bypass":
# "bypass" val is used when unpackaging over connection
assert source is None or source is 1
self.source = source
self.set_delays(source)
self.set_init_perms(source)
def set_delays(self, source):
"""Delay writing permissions and times where appropriate"""
if not source or Globals.change_source_perms:
self.delay_perms, self.newperms = 1, None
else: self.delay_perms = None
if Globals.preserve_atime:
self.delay_atime = 1
# Now get atime right away if possible
if self.data.has_key('atime'): self.newatime = self.data['atime']
else: self.newatime = None
else: self.delay_atime = None
if source:
self.delay_mtime = None # we'll never change mtime of source file
else:
self.delay_mtime = 1
# Save mtime now for a dir, because it might inadvertantly change
if self.isdir(): self.newmtime = self.data['mtime']
else: self.newmtime = None
def set_init_perms(self, source):
"""If necessary, change permissions to ensure access"""
if self.isreg() and not self.readable():
if (source and Globals.change_source_perms or
not source and Globals.change_mirror_perms):
self.chmod_bypass(0400)
elif self.isdir():
if source and Globals.change_source_perms:
if not self.readable() or not self.executable():
self.chmod_bypass(0500)
elif not source and Globals.change_mirror_perms:
if not self.hasfullperms(): self.chmod_bypass(0700)
def warn(self, err):
Log("Received error '%s' when dealing with file %s, skipping..."
% (err, self.path), 1)
raise DSRPPermError(self.path)
def __getstate__(self):
"""Return picklable state. See RPath __getstate__."""
assert self.conn is Globals.local_connection # Can't pickle a conn
return self.getstatedict()
def getstatedict(self):
"""Return dictionary containing the attributes we can save"""
pickle_dict = {}
for attrib in ['index', 'data', 'delay_perms', 'newperms',
'delay_atime', 'newatime',
'delay_mtime', 'newmtime',
'path', 'base', 'source']:
if self.__dict__.has_key(attrib):
pickle_dict[attrib] = self.__dict__[attrib]
return pickle_dict
def __setstate__(self, pickle_dict):
"""Set state from object produced by getstate"""
self.conn = Globals.local_connection
for attrib in pickle_dict.keys():
self.__dict__[attrib] = pickle_dict[attrib]
def chmod(self, permissions):
"""Change permissions, delaying if self.perms_delayed is set"""
if self.delay_perms: self.newperms = self.data['perms'] = permissions
else: RPath.chmod(self, permissions)
def getperms(self):
"""Return dsrp's intended permissions"""
if self.delay_perms and self.newperms is not None:
return self.newperms
else: return self.data['perms']
def chmod_bypass(self, permissions):
"""Change permissions without updating the data dictionary"""
self.delay_perms = 1
if self.newperms is None: self.newperms = self.getperms()
Log("DSRP: Perm bypass %s to %o" % (self.path, permissions), 8)
self.conn.os.chmod(self.path, permissions)
def settime(self, accesstime, modtime):
"""Change times, delaying if self.times_delayed is set"""
if self.delay_atime: self.newatime = self.data['atime'] = accesstime
if self.delay_mtime: self.newmtime = self.data['mtime'] = modtime
if not self.delay_atime or not self.delay_mtime:
RPath.settime(self, accesstime, modtime)
def setmtime(self, modtime):
"""Change mtime, delaying if self.times_delayed is set"""
if self.delay_mtime: self.newmtime = self.data['mtime'] = modtime
else: RPath.setmtime(self, modtime)
def getmtime(self):
"""Return dsrp's intended modification time"""
if self.delay_mtime and self.newmtime is not None:
return self.newmtime
else: return self.data['mtime']
def getatime(self):
"""Return dsrp's intended access time"""
if self.delay_atime and self.newatime is not None:
return self.newatime
else: return self.data['atime']
def write_changes(self):
"""Write saved up permission/time changes"""
if not self.lstat(): return # File has been deleted in meantime
if self.delay_perms and self.newperms is not None:
Log("Finalizing permissions of dsrp %s to %s" %
(self.path, self.newperms), 8)
RPath.chmod(self, self.newperms)
do_atime = self.delay_atime and self.newatime is not None
do_mtime = self.delay_mtime and self.newmtime is not None
if do_atime and do_mtime:
RPath.settime(self, self.newatime, self.newmtime)
elif do_atime and not do_mtime:
RPath.settime(self, self.newatime, self.getmtime())
elif not do_atime and do_mtime:
RPath.setmtime(self, self.newmtime)
def newpath(self, newpath, index = ()):
"""Return similar DSRPath but with new path"""
return self.__class__(self.source, self.conn, newpath, index)
def append(self, ext):
"""Return similar DSRPath with new extension"""
return self.__class__(self.source, self.conn, self.base,
self.index + (ext,))
def new_index(self, index):
"""Return similar DSRPath with new index"""
return self.__class__(self.source, self.conn, self.base, index)
class DestructiveSteppingFinalizer(ITRBranch):
"""Finalizer that can work on an iterator of dsrpaths
The reason we have to use an IterTreeReducer is that some files
should be updated immediately, but for directories we sometimes
need to update all the files in the directory before finally
coming back to it.
"""
dsrpath = None
def start_process(self, index, dsrpath):
self.dsrpath = dsrpath
def end_process(self):
if self.dsrpath: self.dsrpath.write_changes()
def can_fast_process(self, index, dsrpath):
return not self.dsrpath.isdir()
def fast_process(self, index, dsrpath):
if self.dsrpath: self.dsrpath.write_changes()
from log import *
from robust import *
import Globals
from __future__ import generators
execfile("manage.py")
#######################################################################
#
# filelist - Some routines that help with operations over files listed
# in standard input instead of over whole directories.
#
class FilelistError(Exception): pass
class Filelist:
"""Many of these methods have analogs in highlevel.py"""
def File2Iter(fp, baserp):
"""Convert file obj with one pathname per line into rpiter
Closes fp when done. Given files are added to baserp.
"""
while 1:
line = fp.readline()
if not line: break
if line[-1] == "\n": line = line[:-1] # strip trailing newline
if not line: continue # skip blank lines
elif line[0] == "/": raise FilelistError(
"Read in absolute file name %s." % line)
yield baserp.append(line)
assert not fp.close(), "Error closing filelist fp"
def Mirror(src_rpath, dest_rpath, rpiter):
"""Copy files in fileiter from src_rpath to dest_rpath"""
sigiter = dest_rpath.conn.Filelist.get_sigs(dest_rpath, rpiter)
diffiter = Filelist.get_diffs(src_rpath, sigiter)
dest_rpath.conn.Filelist.patch(dest_rpath, diffiter)
dest_rpath.setdata()
def Mirror_and_increment(src_rpath, dest_rpath, inc_rpath):
"""Mirror + put increment in tree based at inc_rpath"""
sigiter = dest_rpath.conn.Filelist.get_sigs(dest_rpath, rpiter)
diffiter = Filelist.get_diffs(src_rpath, sigiter)
dest_rpath.conn.Filelist.patch_and_increment(dest_rpath, diffiter,
inc_rpath)
dest_rpath.setdata()
def get_sigs(dest_rpbase, rpiter):
"""Get signatures of file analogs in rpiter
This is meant to be run on the destination side. Only the
extention part of the rps in rpiter will be used; the base is
ignored.
"""
def dest_iter(src_iter):
for src_rp in src_iter: yield dest_rpbase.new_index(src_rp.index)
return RORPIter.Signatures(dest_iter())
def get_diffs(src_rpbase, sigiter):
"""Get diffs based on sigiter and files in src_rpbase
This should be run on the local side.
"""
for sig_rorp in sigiter:
new_rp = src_rpbase.new_index(sig_rorp.index)
yield RORPIter.diffonce(sig_rorp, new_rp)
def patch(dest_rpbase, diffiter):
"""Process diffs in diffiter and update files in dest_rbpase.
Run remotely.
"""
for diff_rorp in diffiter:
basisrp = dest_rpbase.new_index(diff_rorp.index)
if basisrp.lstat(): Filelist.make_subdirs(basisrp)
Log("Processing %s" % basisrp.path, 7)
RORPIter.patchonce(dest_rpbase, basisrp, diff_rorp)
def patch_and_increment(dest_rpbase, diffiter, inc_rpbase):
"""Apply diffs in diffiter to dest_rpbase, and increment to inc_rpbase
Also to be run remotely.
"""
for diff_rorp in diffiter:
basisrp = dest_rpbase.new_index(diff_rorp.index)
if diff_rorp.lstat(): Filelist.make_subdirs(basisrp)
Log("Processing %s" % basisrp.path, 7)
# XXX This isn't done yet...
def make_subdirs(rpath):
"""Make sure that all the directories under the rpath exist
This function doesn't try to get the permissions right on the
underlying directories, just do the minimum to make sure the
file can be created.
"""
dirname = rpath.dirsplit()[0]
if dirname == '.' or dirname == '': return
dir_rp = RPath(rpath.conn, dirname)
Filelist.make_subdirs(dir_rp)
if not dir_rp.lstat(): dir_rp.mkdir()
MakeStatic(Filelist)
#!/usr/bin/env python
#
# rdiff-backup -- Mirror files while keeping incremental changes
# Version 0.8.0 released June 14, 2002
# Copyright (C) 2001, 2002 Ben Escoto <bescoto@stanford.edu>
#
# This program is licensed under the GNU General Public License (GPL).
# Distributions of rdiff-backup usually include a copy of the GPL in a
# file called COPYING. The GPL is also available online at
# http://www.gnu.org/copyleft/gpl.html.
#
# See http://www.stanford.edu/~bescoto/rdiff-backup for more
# information. Please send mail to me or the mailing list if you find
# bugs or have any suggestions.
from __future__ import nested_scopes, generators
import os, stat, time, sys, getopt, re, cPickle, types, shutil, sha, marshal, traceback, popen2, tempfile, gzip, UserList, errno, signal
This diff is collapsed.
This diff is collapsed.
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Convert an iterator to a file object and vice-versa"""
import cPickle, array
import Globals, C
class IterFileException(Exception): pass
class UnwrapFile:
"""Contains some basic methods for parsing a file containing an iter"""
def __init__(self, file):
self.file = file
def _s2l_old(self, s):
"""Convert string to long int"""
assert len(s) == 7
l = 0L
for i in range(7): l = l*256 + ord(s[i])
return l
def _get(self):
"""Return pair (type, data) next in line on the file
type is a single character which is either "o" for object, "f"
for file, "c" for a continution of a file, or None if no more
data can be read. Data is either the file's data, if type is
"c" or "f", or the actual object if the type is "o".
"""
header = self.file.read(8)
if not header: return None, None
if len(header) != 8:
assert None, "Header %s is only %d bytes" % (header, len(header))
type, length = header[0], C.str2long(header[1:])
buf = self.file.read(length)
if type == "o": return type, cPickle.loads(buf)
else: return type, buf
class IterWrappingFile(UnwrapFile):
"""An iterator generated from a file.
Initialize with a file type object, and then it will return the
elements of the file in order.
"""
def __init__(self, file):
UnwrapFile.__init__(self, file)
self.currently_in_file = None
def __iter__(self): return self
def next(self):
if self.currently_in_file:
self.currently_in_file.close() # no error checking by this point
type, data = self._get()
if not type: raise StopIteration
if type == "o": return data
elif type == "f":
file = IterVirtualFile(self, data)
if data: self.currently_in_file = file
else: self.currently_in_file = None
return file
else: raise IterFileException("Bad file type %s" % type)
class IterVirtualFile(UnwrapFile):
"""Another version of a pretend file
This is returned by IterWrappingFile when a file is embedded in
the main file that the IterWrappingFile is based around.
"""
def __init__(self, iwf, initial_data):
"""Initializer
initial_data is the data from the first block of the file.
iwf is the iter wrapping file that spawned this
IterVirtualFile.
"""
UnwrapFile.__init__(self, iwf.file)
self.iwf = iwf
self.buffer = initial_data
self.closed = None
def read(self, length = -1):
"""Read length bytes from the file, updating buffers as necessary"""
assert not self.closed
if self.iwf.currently_in_file:
if length >= 0:
while length >= len(self.buffer):
if not self.addtobuffer(): break
real_len = min(length, len(self.buffer))
else:
while 1:
if not self.addtobuffer(): break
real_len = len(self.buffer)
else: real_len = min(length, len(self.buffer))
return_val = self.buffer[:real_len]
self.buffer = self.buffer[real_len:]
return return_val
def addtobuffer(self):
"""Read a chunk from the file and add it to the buffer"""
assert self.iwf.currently_in_file
type, data = self._get()
assert type == "c", "Type is %s instead of c" % type
if data:
self.buffer += data
return 1
else:
self.iwf.currently_in_file = None
return None
def close(self):
"""Currently just reads whats left and discards it"""
while self.iwf.currently_in_file:
self.addtobuffer()
self.buffer = ""
self.closed = 1
class FileWrappingIter:
"""A file interface wrapping around an iterator
This is initialized with an iterator, and then converts it into a
stream of characters. The object will evaluate as little of the
iterator as is necessary to provide the requested bytes.
The actual file is a sequence of marshaled objects, each preceded
by 8 bytes which identifies the following the type of object, and
specifies its length. File objects are not marshalled, but the
data is written in chunks of Globals.blocksize, and the following
blocks can identify themselves as continuations.
"""
def __init__(self, iter):
"""Initialize with iter"""
self.iter = iter
self.array_buf = array.array('c')
self.currently_in_file = None
self.closed = None
def read(self, length):
"""Return next length bytes in file"""
assert not self.closed
while len(self.array_buf) < length:
if not self.addtobuffer(): break
result = self.array_buf[:length].tostring()
del self.array_buf[:length]
return result
def addtobuffer(self):
"""Updates self.buffer, adding a chunk from the iterator.
Returns None if we have reached the end of the iterator,
otherwise return true.
"""
array_buf = self.array_buf
if self.currently_in_file:
array_buf.fromstring("c")
array_buf.fromstring(self.addfromfile())
else:
try: currentobj = self.iter.next()
except StopIteration: return None
if hasattr(currentobj, "read") and hasattr(currentobj, "close"):
self.currently_in_file = currentobj
array_buf.fromstring("f")
array_buf.fromstring(self.addfromfile())
else:
pickle = cPickle.dumps(currentobj, 1)
array_buf.fromstring("o")
array_buf.fromstring(C.long2str(long(len(pickle))))
array_buf.fromstring(pickle)
return 1
def addfromfile(self):
"""Read a chunk from the current file and return it"""
# Check file read for errors, buf = "" if find one
buf = Robust.check_common_error(self.read_error_handler,
self.currently_in_file.read,
[Globals.blocksize])
if not buf:
assert not self.currently_in_file.close()
self.currently_in_file = None
return C.long2str(long(len(buf))) + buf
def read_error_handler(self, exc, blocksize):
"""Log error when reading from file"""
Log("Error '%s' reading from fileobj, truncating" % (str(exc),), 2)
return ""
def _l2s_old(self, l):
"""Convert long int to string of 7 characters"""
s = ""
for i in range(7):
l, remainder = divmod(l, 256)
s = chr(remainder) + s
assert remainder == 0
return s
def close(self): self.closed = 1
class BufferedRead:
"""Buffer the .read() calls to the given file
This is used to lessen overhead and latency when a file is sent
over a connection. Profiling said that arrays were faster than
strings here.
"""
def __init__(self, file):
self.file = file
self.array_buf = array.array('c')
self.bufsize = Globals.conn_bufsize
def read(self, l = -1):
array_buf = self.array_buf
if l < 0: # Read as much as possible
result = array_buf.tostring() + self.file.read()
del array_buf[:]
return result
if len(array_buf) < l: # Try to make buffer at least as long as l
array_buf.fromstring(self.file.read(max(self.bufsize, l)))
result = array_buf[:l].tostring()
del array_buf[:l]
return result
def close(self): return self.file.close()
from log import *
from robust import *
This diff is collapsed.
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Provides a high-level interface to some librsync functions
This is a python wrapper around the lower-level _librsync module,
which is written in C. The goal was to use C as little as possible...
"""
import _librsync, types, array
blocksize = _librsync.RS_JOB_BLOCKSIZE
class librsyncError(Exception):
"""Signifies error in internal librsync processing (bad signature, etc.)
underlying _librsync.librsyncError's are regenerated using this
class because the C-created exceptions are by default
unPickleable. There is probably a way to fix this in _librsync,
but this scheme was easier.
"""
pass
class LikeFile:
"""File-like object used by SigFile, DeltaFile, and PatchFile"""
mode = "rb"
# This will be replaced in subclasses by an object with
# appropriate cycle() method
maker = None
def __init__(self, infile, need_seek = None):
"""LikeFile initializer - zero buffers, set eofs off"""
self.check_file(infile, need_seek)
self.infile = infile
self.closed = self.infile_closed = None
self.inbuf = ""
self.outbuf = array.array('c')
self.eof = self.infile_eof = None
def check_file(self, file, need_seek = None):
"""Raise type error if file doesn't have necessary attributes"""
if not hasattr(file, "read"):
raise TypeError("Basis file must have a read() method")
if not hasattr(file, "close"):
raise TypeError("Basis file must have a close() method")
if need_seek and not hasattr(file, "seek"):
raise TypeError("Basis file must have a seek() method")
def read(self, length = -1):
"""Build up self.outbuf, return first length bytes"""
if length == -1:
while not self.eof: self._add_to_outbuf_once()
real_len = len(self.outbuf)
else:
while not self.eof and len(self.outbuf) < length:
self._add_to_outbuf_once()
real_len = min(length, len(self.outbuf))
return_val = self.outbuf[:real_len].tostring()
del self.outbuf[:real_len]
return return_val
def _add_to_outbuf_once(self):
"""Add one cycle's worth of output to self.outbuf"""
if not self.infile_eof: self._add_to_inbuf()
try: self.eof, len_inbuf_read, cycle_out = self.maker.cycle(self.inbuf)
except _librsync.librsyncError, e: raise librsyncError(str(e))
self.inbuf = self.inbuf[len_inbuf_read:]
self.outbuf.fromstring(cycle_out)
def _add_to_inbuf(self):
"""Make sure len(self.inbuf) >= blocksize"""
assert not self.infile_eof
while len(self.inbuf) < blocksize:
new_in = self.infile.read(blocksize)
if not new_in:
self.infile_eof = 1
assert not self.infile.close()
self.infile_closed = 1
break
self.inbuf += new_in
def close(self):
"""Close infile"""
if not self.infile_closed: assert not self.infile.close()
self.closed = 1
class SigFile(LikeFile):
"""File-like object which incrementally generates a librsync signature"""
def __init__(self, infile):
"""SigFile initializer - takes basis file
basis file only needs to have read() and close() methods. It
will be closed when we come to the end of the signature.
"""
LikeFile.__init__(self, infile)
try: self.maker = _librsync.new_sigmaker()
except _librsync.librsyncError, e: raise librsyncError(str(e))
class DeltaFile(LikeFile):
"""File-like object which incrementally generates a librsync delta"""
def __init__(self, signature, new_file):
"""DeltaFile initializer - call with signature and new file
Signature can either be a string or a file with read() and
close() methods. New_file also only needs to have read() and
close() methods. It will be closed when self is closed.
"""
LikeFile.__init__(self, new_file)
if type(signature) is types.StringType: sig_string = signature
else:
self.check_file(signature)
sig_string = signature.read()
assert not signature.close()
try: self.maker = _librsync.new_deltamaker(sig_string)
except _librsync.librsyncError, e: raise librsyncError(str(e))
class PatchedFile(LikeFile):
"""File-like object which applies a librsync delta incrementally"""
def __init__(self, basis_file, delta_file):
"""PatchedFile initializer - call with basis delta
Here basis_file must be a true Python file, because we may
need to seek() around in it a lot, and this is done in C.
delta_file only needs read() and close() methods.
"""
LikeFile.__init__(self, delta_file)
if type(basis_file) is not types.FileType:
raise TypeError("basis_file must be a (true) file")
try: self.maker = _librsync.new_patchmaker(basis_file)
except _librsync.librsyncError, e: raise librsyncError(str(e))
class SigGenerator:
"""Calculate signature.
Input and output is same as SigFile, but the interface is like md5
module, not filelike object
"""
def __init__(self):
"""Return new signature instance"""
try: self.sig_maker = _librsync.new_sigmaker()
except _librsync.librsyncError, e: raise librsyncError(str(e))
self.gotsig = None
self.buffer = ""
self.sig_string = ""
def update(self, buf):
"""Add buf to data that signature will be calculated over"""
if self.gotsig:
raise librsyncError("SigGenerator already provided signature")
self.buffer += buf
while len(self.buffer) >= blocksize:
if self.process_buffer():
raise librsyncError("Premature EOF received from sig_maker")
def process_buffer(self):
"""Run self.buffer through sig_maker, add to self.sig_string"""
try: eof, len_buf_read, cycle_out = self.sig_maker.cycle(self.buffer)
except _librsync.librsyncError, e: raise librsyncError(str(e))
self.buffer = self.buffer[len_buf_read:]
self.sig_string += cycle_out
return eof
def getsig(self):
"""Return signature over given data"""
while not self.process_buffer(): pass # keep running until eof
return self.sig_string
#!/usr/bin/env python
"""Demonstrate a memory leak in pysync/librsync"""
import os, _librsync
from librsync import *
os.chdir("/tmp")
# Write 2 1 byte files
afile = open("a", "wb")
afile.write("a")
afile.close()
efile = open("e", "wb")
efile.write("e")
efile.close()
def copy(infileobj, outpath):
outfile = open(outpath, "wb")
while 1:
buf = infileobj.read(32768)
if not buf: break
outfile.write(buf)
assert not outfile.close()
assert not infileobj.close()
def test_cycle():
for i in xrange(100000):
sm = _librsync.new_sigmaker()
sm.cycle("a")
def main_test():
for i in xrange(100000):
# Write signature file
afile = open("a", "rb")
copy(SigFile(afile), "sig")
# Write delta file
efile = open("e", "r")
sigfile = open("sig", "rb")
copy(DeltaFile(sigfile, efile), "delta")
# Write patched file
afile = open("e", "rb")
deltafile = open("delta", "rb")
copy(PatchedFile(afile, deltafile), "a.out")
main_test()
This diff is collapsed.
This diff is collapsed.
#include <stdio.h>
#include <rsync.h>
main()
{
FILE *basis_file, *sig_file;
char filename[50];
rs_stats_t stats;
rs_result result;
long i;
for(i=0; i<=100000; i++) {
basis_file = fopen("a", "r");
sig_file = fopen("sig", "w");
result = rs_sig_file(basis_file, sig_file,
RS_DEFAULT_BLOCK_LEN, RS_DEFAULT_STRONG_LEN,
&stats);
if (result != RS_DONE) exit(result);
fclose(basis_file);
fclose(sig_file);
}
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment