Commit 9d969392 authored by Tim Peters's avatar Tim Peters

Merge rev 37659 from 2.8 branch.

Worm around suspected Windows socket bug in Windows trigger code.

See the thread starting at
 http://mail.zope.org/pipermail/zope/2005-July/160433.html
for gory details.

Note that Zope trunk and 2.8 also have a third copy of this
code, in

    lib/python/zope/server/trigger.py

That's "a Zope3 problem".
parent 471576e3
...@@ -53,6 +53,15 @@ Zope Changes ...@@ -53,6 +53,15 @@ Zope Changes
Bugs fixed Bugs fixed
- As developed in a long thread starting at
http://mail.zope.org/pipermail/zope/2005-July/160433.html
there appears to be a race bug in the Microsoft Windows socket
implementation, rarely visible in ZEO and/or in
ZServer/medusa/thread/select_trigger.py when multiple processes try
to create an "asyncore trigger" simultaneously, most often (in
stress tests) manifesting as a hung process. Windows-specific
trigger code in both changed to work around this bug when it occurs.
- Collector #1807: fixed memory leak in cAccessControl.guarded_getattr() - Collector #1807: fixed memory leak in cAccessControl.guarded_getattr()
- Collector #1852: fixed wrong URL construction in webdav.davcmds - Collector #1852: fixed wrong URL construction in webdav.davcmds
......
...@@ -9,6 +9,7 @@ import os ...@@ -9,6 +9,7 @@ import os
import socket import socket
import string import string
import thread import thread
import errno
if os.name == 'posix': if os.name == 'posix':
...@@ -95,59 +96,82 @@ else: ...@@ -95,59 +96,82 @@ else:
class BindError(Exception): class BindError(Exception):
pass pass
class trigger (asyncore.dispatcher): class trigger(asyncore.dispatcher):
address = ('127.9.9.9', 19999)
def __init__ (self): def __init__ (self):
a = socket.socket (socket.AF_INET, socket.SOCK_STREAM) # The __init__ code is taken from ZODB 3.4.1's
w = socket.socket (socket.AF_INET, socket.SOCK_STREAM) # ZEO/zrpc/trigger.py, to worm around problems in the original
# Windows __init__ code.
# set TCP_NODELAY to true to avoid buffering
w.setsockopt(socket.IPPROTO_TCP, 1, 1) # Get a pair of connected sockets. The trigger is the 'w'
# end of the pair, which is connected to 'r'. 'r' is put
# tricky: get a pair of connected sockets # in the asyncore socket map. "pulling the trigger" then
host='127.0.0.1' # means writing something on w, which will wake up r.
port=19999
w = socket.socket()
# Disable buffering -- pulling the trigger sends 1 byte,
# and we want that sent immediately, to wake up asyncore's
# select() ASAP.
w.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
count = 0
while 1: while 1:
count += 1
# Bind to a local port; for efficiency, let the OS pick
# a free port for us.
# Unfortunately, stress tests showed that we may not
# be able to connect to that port ("Address already in
# use") despite that the OS picked it. This appears
# to be a race bug in the Windows socket implementation.
# So we loop until a connect() succeeds (almost always
# on the first try). See the long thread at
# http://mail.zope.org/pipermail/zope/2005-July/160433.html
# for hideous details.
a = socket.socket()
a.bind(("127.0.0.1", 0))
connect_address = a.getsockname() # assigned (host, port) pair
a.listen(1)
try: try:
self.address=(host, port) w.connect(connect_address)
a.bind(self.address) break # success
break except socket.error, detail:
except: if detail[0] != errno.WSAEADDRINUSE:
if port <= 19950: # "Address already in use" is the only error
raise BindError, 'Cannot bind trigger!' # I've seen on two WinXP Pro SP2 boxes, under
port=port - 1 # Pythons 2.3.5 and 2.4.1.
raise
# (10048, 'Address already in use')
# assert count <= 2 # never triggered in Tim's tests
if count >= 10: # I've never seen it go above 2
a.close()
w.close()
raise BindError("Cannot bind trigger!")
# Close `a` and try again. Note: I originally put a short
# sleep() here, but it didn't appear to help or hurt.
a.close()
a.listen (1) r, addr = a.accept() # r becomes asyncore's (self.)socket
w.setblocking (0)
try:
w.connect (self.address)
except:
pass
r, addr = a.accept()
a.close() a.close()
w.setblocking (1)
self.trigger = w self.trigger = w
asyncore.dispatcher.__init__ (self, r) asyncore.dispatcher.__init__ (self, r)
self.lock = thread.allocate_lock() self.lock = thread.allocate_lock()
self.thunks = [] self.thunks = []
self._trigger_connected = 0 self._trigger_connected = 0
def __repr__ (self): def __repr__(self):
return '<select-trigger (loopback) at %x>' % id(self) return '<select-trigger (loopback) at %x>' % id(self)
def readable (self): def readable(self):
return 1 return 1
def writable (self): def writable(self):
return 0 return 0
def handle_connect (self): def handle_connect(self):
pass pass
def pull_trigger (self, thunk=None): def pull_trigger(self, thunk=None):
if thunk: if thunk:
try: try:
self.lock.acquire() self.lock.acquire()
...@@ -156,8 +180,8 @@ else: ...@@ -156,8 +180,8 @@ else:
self.lock.release() self.lock.release()
self.trigger.send ('x') self.trigger.send ('x')
def handle_read (self): def handle_read(self):
self.recv (8192) self.recv(8192)
try: try:
self.lock.acquire() self.lock.acquire()
for thunk in self.thunks: for thunk in self.thunks:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment