Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
neoppod
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
1
Issues
1
List
Boards
Labels
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
neoppod
Commits
a4c06242
Commit
a4c06242
authored
Jan 10, 2017
by
Julien Muchembled
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Review aborting of transactions
parent
dc7a129c
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
182 additions
and
33 deletions
+182
-33
neo/client/app.py
neo/client/app.py
+10
-13
neo/lib/protocol.py
neo/lib/protocol.py
+2
-1
neo/master/app.py
neo/master/app.py
+1
-1
neo/master/handlers/client.py
neo/master/handlers/client.py
+24
-3
neo/master/transactions.py
neo/master/transactions.py
+3
-1
neo/storage/handlers/client.py
neo/storage/handlers/client.py
+0
-4
neo/storage/handlers/master.py
neo/storage/handlers/master.py
+4
-0
neo/storage/replicator.py
neo/storage/replicator.py
+11
-4
neo/storage/transactions.py
neo/storage/transactions.py
+2
-3
neo/tests/__init__.py
neo/tests/__init__.py
+25
-0
neo/tests/threaded/__init__.py
neo/tests/threaded/__init__.py
+7
-2
neo/tests/threaded/test.py
neo/tests/threaded/test.py
+93
-1
No files found.
neo/client/app.py
View file @
a4c06242
...
...
@@ -595,19 +595,16 @@ class Application(ThreadedApplication):
txn_context
=
self
.
_txn_container
.
pop
(
transaction
)
if
txn_context
is
None
:
return
p
=
Packets
.
AbortTransaction
(
txn_context
[
'ttid'
])
# cancel transaction on all those nodes
conns
=
[
self
.
master_conn
]
for
uuid
in
txn_context
[
'involved_nodes'
]:
node
=
self
.
nm
.
getByUUID
(
uuid
)
if
node
is
not
None
:
conns
.
append
(
self
.
cp
.
getConnForNode
(
node
))
for
conn
in
conns
:
if
conn
is
not
None
:
try
:
conn
.
notify
(
p
)
except
ConnectionClosed
:
pass
try
:
notify
=
self
.
master_conn
.
notify
except
AttributeError
:
pass
else
:
try
:
notify
(
Packets
.
AbortTransaction
(
txn_context
[
'ttid'
],
txn_context
[
'involved_nodes'
]))
except
ConnectionClosed
:
pass
# We don't need to flush queue, as it won't be reused by future
# transactions (deleted on next line & indexed by transaction object
# instance).
...
...
neo/lib/protocol.py
View file @
a4c06242
...
...
@@ -976,10 +976,11 @@ class StoreObject(Packet):
class
AbortTransaction
(
Packet
):
"""
Abort a transaction. C ->
S, PM
.
Abort a transaction. C ->
PM -> S
.
"""
_fmt
=
PStruct
(
'abort_transaction'
,
PTID
(
'tid'
),
PFUUIDList
,
# unused for PM -> S
)
class
StoreTransaction
(
Packet
):
...
...
neo/master/app.py
View file @
a4c06242
...
...
@@ -585,6 +585,6 @@ class Application(BaseApplication):
def
isStorageReady
(
self
,
uuid
):
return
uuid
in
self
.
storage_ready_dict
def
getStorageReadySet
(
self
,
readiness
):
def
getStorageReadySet
(
self
,
readiness
=
float
(
'inf'
)
):
return
{
k
for
k
,
v
in
self
.
storage_ready_dict
.
iteritems
()
if
v
<=
readiness
}
neo/master/handlers/client.py
View file @
a4c06242
...
...
@@ -74,6 +74,12 @@ class ClientServiceHandler(MasterHandler):
node
.
ask
(
p
,
timeout
=
60
)
else
:
conn
.
answer
(
Errors
.
IncompleteTransaction
())
# It's simpler to abort automatically rather than asking the client
# to send a notification on tpc_abort, since it would have keep the
# transaction longer in list of transactions.
# This should happen so rarely that we don't try to minimize the
# number of abort notifications by looking the modified partitions.
self
.
abortTransaction
(
conn
,
ttid
,
app
.
getStorageReadySet
())
def
askFinalTID
(
self
,
conn
,
ttid
):
tm
=
self
.
app
.
tm
...
...
@@ -102,9 +108,24 @@ class ClientServiceHandler(MasterHandler):
else
:
conn
.
answer
(
Packets
.
AnswerPack
(
False
))
def
abortTransaction
(
self
,
conn
,
tid
):
# BUG: The replicator may wait this transaction to be finished.
self
.
app
.
tm
.
abort
(
tid
,
conn
.
getUUID
())
def
abortTransaction
(
self
,
conn
,
tid
,
uuid_list
):
# Consider a failure when the connection between the storage and the
# client breaks while the answer to the first write is sent back.
# In other words, the client can not know the exact set of nodes that
# know this transaction, and it sends us all nodes it considered for
# writing.
# We must also add those that are waiting for this transaction to be
# finished (returned by tm.abort), because they may have join the
# cluster after that the client started to abort.
app
=
self
.
app
involved
=
app
.
tm
.
abort
(
tid
,
conn
.
getUUID
())
involved
.
update
(
uuid_list
)
involved
.
intersection_update
(
app
.
getStorageReadySet
())
if
involved
:
p
=
Packets
.
AbortTransaction
(
tid
,
())
getByUUID
=
app
.
nm
.
getByUUID
for
involved
in
involved
:
getByUUID
(
involved
).
notify
(
p
)
# like ClientServiceHandler but read-only & only for tid <= backup_tid
...
...
neo/master/transactions.py
View file @
a4c06242
...
...
@@ -393,10 +393,12 @@ class TransactionManager(EventQueue):
Abort a transaction
"""
logging
.
debug
(
'Abort TXN %s for %s'
,
dump
(
ttid
),
uuid_str
(
uuid
))
if
self
[
ttid
].
isPrepared
():
txn
=
self
[
ttid
]
if
txn
.
isPrepared
():
raise
ProtocolError
(
"commit already requested for ttid %s"
%
dump
(
ttid
))
del
self
[
ttid
]
return
txn
.
_notification_set
def
lock
(
self
,
ttid
,
uuid
):
"""
...
...
neo/storage/handlers/client.py
View file @
a4c06242
...
...
@@ -57,9 +57,6 @@ class ClientOperationHandler(EventHandler):
compression
,
checksum
,
data
,
data_serial
)
conn
.
answer
(
p
)
def
abortTransaction
(
self
,
conn
,
ttid
):
self
.
app
.
tm
.
abort
(
ttid
)
def
askStoreTransaction
(
self
,
conn
,
ttid
,
*
txn_info
):
self
.
app
.
tm
.
register
(
conn
,
ttid
)
self
.
app
.
tm
.
vote
(
ttid
,
txn_info
)
...
...
@@ -201,7 +198,6 @@ class ClientReadOnlyOperationHandler(ClientOperationHandler):
conn
.
answer
(
Errors
.
ReadOnlyAccess
(
'read-only access because cluster is in backuping mode'
))
abortTransaction
=
_readOnly
askStoreTransaction
=
_readOnly
askVoteTransaction
=
_readOnly
askStoreObject
=
_readOnly
...
...
neo/storage/handlers/master.py
View file @
a4c06242
...
...
@@ -57,6 +57,10 @@ class MasterOperationHandler(BaseMasterHandler):
def
notifyUnlockInformation
(
self
,
conn
,
ttid
):
self
.
app
.
tm
.
unlock
(
ttid
)
def
abortTransaction
(
self
,
conn
,
ttid
,
_
):
self
.
app
.
tm
.
abort
(
ttid
)
self
.
app
.
replicator
.
transactionFinished
(
ttid
)
def
askPack
(
self
,
conn
,
tid
):
app
=
self
.
app
logging
.
info
(
'Pack started, up to %s...'
,
dump
(
tid
))
...
...
neo/storage/replicator.py
View file @
a4c06242
...
...
@@ -135,14 +135,19 @@ class Replicator(object):
self
.
replicate_dict
[
offset
]
=
max_tid
self
.
_nextPartition
()
def
transactionFinished
(
self
,
ttid
,
max_tid
):
def
transactionFinished
(
self
,
ttid
,
max_tid
=
None
):
""" Callback from MasterOperationHandler """
self
.
ttid_set
.
remove
(
ttid
)
try
:
self
.
ttid_set
.
remove
(
ttid
)
except
KeyError
:
assert
max_tid
is
None
,
max_tid
return
min_ttid
=
min
(
self
.
ttid_set
)
if
self
.
ttid_set
else
INVALID_TID
for
offset
,
p
in
self
.
partition_dict
.
iteritems
():
if
p
.
max_ttid
and
p
.
max_ttid
<
min_ttid
:
p
.
max_ttid
=
None
self
.
replicate_dict
[
offset
]
=
max_tid
if
max_tid
:
self
.
replicate_dict
[
offset
]
=
max_tid
self
.
_nextPartition
()
def
getBackupTID
(
self
):
...
...
@@ -355,7 +360,9 @@ class Replicator(object):
p
=
self
.
partition_dict
[
offset
]
p
.
next_obj
=
add64
(
tid
,
1
)
self
.
updateBackupTID
()
if
not
p
.
max_ttid
:
if
p
.
max_ttid
:
logging
.
debug
(
"unfinished transactions: %r"
,
self
.
ttid_set
)
else
:
self
.
app
.
tm
.
replicated
(
offset
,
tid
)
logging
.
debug
(
"partition %u replicated up to %s from %r"
,
offset
,
dump
(
tid
),
self
.
current_node
)
...
...
neo/storage/transactions.py
View file @
a4c06242
...
...
@@ -324,9 +324,8 @@ class TransactionManager(EventQueue):
Note: does not alter persistent content.
"""
if
ttid
not
in
self
.
_transaction_dict
:
# the tid may be unknown as the transaction is aborted on every node
# of the partition, even if no data was received (eg. conflict on
# another node)
assert
not
even_if_locked
# See how the master processes AbortTransaction from the client.
return
logging
.
debug
(
'Abort TXN %s'
,
dump
(
ttid
))
transaction
=
self
.
_transaction_dict
[
ttid
]
...
...
neo/tests/__init__.py
View file @
a4c06242
...
...
@@ -37,6 +37,7 @@ from time import time
from
struct
import
pack
,
unpack
from
unittest.case
import
_ExpectedFailure
,
_UnexpectedSuccess
try
:
from
transaction.interfaces
import
IDataManager
from
ZODB.utils
import
newTid
except
ImportError
:
pass
...
...
@@ -378,6 +379,30 @@ class NeoUnitTestBase(NeoTestBase):
return
packet
class
TransactionalResource
(
object
):
class
_sortKey
(
object
):
def
__init__
(
self
,
last
):
self
.
_last
=
last
def
__cmp__
(
self
,
other
):
assert
type
(
self
)
is
not
type
(
other
),
other
return
1
if
self
.
_last
else
-
1
def
__init__
(
self
,
txn
,
last
,
**
kw
):
self
.
sortKey
=
lambda
:
self
.
_sortKey
(
last
)
for
k
in
kw
:
assert
callable
(
IDataManager
.
get
(
k
)),
k
self
.
__dict__
.
update
(
kw
)
txn
.
get
().
join
(
self
)
def
__getattr__
(
self
,
attr
):
if
callable
(
IDataManager
.
get
(
attr
)):
return
lambda
*
_
:
None
return
self
.
__getattribute__
(
attr
)
class
Patch
(
object
):
"""
Patch attributes and revert later automatically.
...
...
neo/tests/threaded/__init__.py
View file @
a4c06242
...
...
@@ -956,13 +956,12 @@ class NEOThreadedTest(NeoTestBase):
return
obj
return
unpickler
class
newThread
(
threading
.
Thread
):
class
new
Paused
Thread
(
threading
.
Thread
):
def
__init__
(
self
,
func
,
*
args
,
**
kw
):
threading
.
Thread
.
__init__
(
self
)
self
.
__target
=
func
,
args
,
kw
self
.
daemon
=
True
self
.
start
()
def
run
(
self
):
try
:
...
...
@@ -978,6 +977,12 @@ class NEOThreadedTest(NeoTestBase):
del
self
.
__exc_info
raise
etype
,
value
,
tb
class
newThread
(
newPausedThread
):
def
__init__
(
self
,
*
args
,
**
kw
):
NEOThreadedTest
.
newPausedThread
.
__init__
(
self
,
*
args
,
**
kw
)
self
.
start
()
def
commitWithStorageFailure
(
self
,
client
,
txn
):
with
Patch
(
client
,
_getFinalTID
=
lambda
*
_
:
None
):
self
.
assertRaises
(
ConnectionClosed
,
txn
.
commit
)
...
...
neo/tests/threaded/test.py
View file @
a4c06242
...
...
@@ -32,7 +32,7 @@ from neo.lib.connection import ServerConnection, MTClientConnection
from
neo.lib.exception
import
DatabaseFailure
,
StoppedOperation
from
neo.lib.protocol
import
CellStates
,
ClusterStates
,
NodeStates
,
Packets
,
\
ZERO_OID
,
ZERO_TID
from
..
import
expectedFailure
,
Patch
from
..
import
expectedFailure
,
Patch
,
TransactionalResource
from
.
import
ConnectionFilter
,
LockLock
,
NEOThreadedTest
,
with_cluster
from
neo.lib.util
import
add64
,
makeChecksum
,
p64
,
u64
from
neo.client.exception
import
NEOPrimaryMasterLost
,
NEOStorageError
...
...
@@ -1463,5 +1463,97 @@ class Test(NEOThreadedTest):
value_list
.
append
(
r
[
x
].
value
)
self
.
assertEqual
(
value_list
,
range
(
3
))
@
with_cluster
(
replicas
=
1
,
partitions
=
3
,
storage_count
=
3
)
def
testMasterArbitratingVote
(
self
,
cluster
):
"""
p
\
S
1
2 3
0 U U .
1 . U U
2 U . U
With the above setup, check when a client C1 fails to connect to S2
and another C2 fails to connect to S1.
For the first 2 scenarios:
- C1 first votes (the master accepts)
- C2 vote is delayed until C1 decides to finish or abort
"""
def
delayAbort
(
conn
,
packet
):
return
isinstance
(
packet
,
Packets
.
AbortTransaction
)
def
noConnection
(
jar
,
storage
):
return
Patch
(
jar
.
db
().
storage
.
app
.
cp
,
getConnForNode
=
lambda
orig
,
node
:
None
if
node
.
getUUID
()
==
storage
.
uuid
else
orig
(
node
))
def
c1_vote
(
txn
):
def
vote
(
orig
,
*
args
):
result
=
orig
(
*
args
)
ll
()
return
result
with
LockLock
()
as
ll
,
Patch
(
cluster
.
master
.
tm
,
vote
=
vote
):
commit2
.
start
()
ll
()
if
c1_aborts
:
raise
Exception
pt
=
[{
x
.
getUUID
()
for
x
in
x
}
for
x
in
cluster
.
master
.
pt
.
partition_list
]
cluster
.
storage_list
.
sort
(
key
=
lambda
x
:
(
x
.
uuid
not
in
pt
[
0
],
x
.
uuid
in
pt
[
1
]))
pt
=
'UU.|.UU|U.U'
self
.
assertPartitionTable
(
cluster
,
pt
)
s1
,
s2
,
s3
=
cluster
.
storage_list
t1
,
c1
=
cluster
.
getTransaction
()
with
cluster
.
newClient
(
1
)
as
db
:
t2
,
c2
=
cluster
.
getTransaction
(
db
)
with
noConnection
(
c1
,
s2
),
noConnection
(
c2
,
s1
):
cluster
.
client
.
cp
.
connection_dict
[
s2
.
uuid
].
close
()
self
.
tic
()
for
c1_aborts
in
0
,
1
:
# 0: C1 finishes, C2 vote fails
# 1: C1 aborts, C2 finishes
#
# Although we try to modify the same oid, there's no
# conflict because each storage node sees a single
# and different transaction: vote to storages is done
# in parallel, and the master must be involved as an
# arbitrator, which ultimately rejects 1 of the 2
# transactions, preferably before the second phase of
# the commit.
t1
.
begin
();
c1
.
root
().
_p_changed
=
1
t2
.
begin
();
c2
.
root
().
_p_changed
=
1
commit2
=
self
.
newPausedThread
(
t2
.
commit
)
TransactionalResource
(
t1
,
1
,
tpc_vote
=
c1_vote
)
with
ConnectionFilter
()
as
f
:
if
not
c1_aborts
:
f
.
add
(
delayAbort
)
f
.
delayAskFetchTransactions
(
lambda
_
:
f
.
discard
(
delayAbort
))
try
:
t1
.
commit
()
self
.
assertFalse
(
c1_aborts
)
except
Exception
:
self
.
assertTrue
(
c1_aborts
)
try
:
commit2
.
join
()
self
.
assertTrue
(
c1_aborts
)
except
NEOStorageError
:
self
.
assertFalse
(
c1_aborts
)
self
.
tic
()
self
.
assertPartitionTable
(
cluster
,
'OU.|.UU|O.U'
if
c1_aborts
else
'UO.|.OU|U.U'
)
self
.
tic
()
self
.
assertPartitionTable
(
cluster
,
pt
)
# S3 fails while C1 starts to finish
with
ConnectionFilter
()
as
f
:
f
.
add
(
lambda
conn
,
packet
:
conn
.
getUUID
()
==
s3
.
uuid
and
isinstance
(
packet
,
Packets
.
AcceptIdentification
))
t1
.
begin
();
c1
.
root
().
_p_changed
=
1
TransactionalResource
(
t1
,
0
,
tpc_finish
=
lambda
*
_
:
cluster
.
master
.
nm
.
getByUUID
(
s3
.
uuid
)
.
getConnection
().
close
())
self
.
assertRaises
(
NEOStorageError
,
t1
.
commit
)
self
.
assertPartitionTable
(
cluster
,
'UU.|.UO|U.O'
)
self
.
tic
()
self
.
assertPartitionTable
(
cluster
,
pt
)
if
__name__
==
"__main__"
:
unittest
.
main
()
Julien Muchembled
@jm
mentioned in commit
9b33b1db
·
Feb 27, 2017
mentioned in commit
9b33b1db
mentioned in commit 9b33b1db82e3ab4b8e668b17f9491dbe30a0a9c7
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment