Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
neoppod
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
1
Issues
1
List
Boards
Labels
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
neoppod
Commits
f2796d9c
Commit
f2796d9c
authored
Dec 27, 2016
by
Julien Muchembled
Browse files
Options
Browse Files
Download
Plain Diff
Replace --prune-orphan storage option with a command that can be used in RUNNING state
parents
fd007f5d
ccbf7bce
Pipeline
#4584
skipped
Changes
13
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
172 additions
and
35 deletions
+172
-35
neo/admin/handler.py
neo/admin/handler.py
+1
-0
neo/lib/config.py
neo/lib/config.py
+0
-4
neo/lib/event.py
neo/lib/event.py
+10
-7
neo/lib/protocol.py
neo/lib/protocol.py
+24
-1
neo/lib/threaded_app.py
neo/lib/threaded_app.py
+2
-2
neo/master/handlers/administration.py
neo/master/handlers/administration.py
+13
-0
neo/neoctl/app.py
neo/neoctl/app.py
+24
-9
neo/neoctl/neoctl.py
neo/neoctl/neoctl.py
+6
-0
neo/scripts/neostorage.py
neo/scripts/neostorage.py
+1
-5
neo/storage/database/manager.py
neo/storage/database/manager.py
+51
-6
neo/storage/handlers/__init__.py
neo/storage/handlers/__init__.py
+5
-0
neo/tests/threaded/__init__.py
neo/tests/threaded/__init__.py
+1
-1
neo/tests/threaded/test.py
neo/tests/threaded/test.py
+34
-0
No files found.
neo/admin/handler.py
View file @
f2796d9c
...
...
@@ -71,6 +71,7 @@ class AdminEventHandler(EventHandler):
setNodeState
=
forward_ask
(
Packets
.
SetNodeState
)
checkReplicas
=
forward_ask
(
Packets
.
CheckReplicas
)
truncate
=
forward_ask
(
Packets
.
Truncate
)
repair
=
forward_ask
(
Packets
.
Repair
)
class
MasterEventHandler
(
EventHandler
):
...
...
neo/lib/config.py
View file @
f2796d9c
...
...
@@ -130,10 +130,6 @@ class ConfigurationManager(object):
# only from command line
return
self
.
argument_list
.
get
(
'reset'
,
False
)
def
getPruneOrphan
(
self
):
# only from command line
return
self
.
argument_list
.
get
(
'prune_orphan'
,
False
)
def
getUUID
(
self
):
# only from command line
uuid
=
self
.
argument_list
.
get
(
'uuid'
,
None
)
...
...
neo/lib/event.py
View file @
f2796d9c
...
...
@@ -14,7 +14,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import
os
,
thread
import
os
from
time
import
time
from
select
import
epoll
,
EPOLLIN
,
EPOLLOUT
,
EPOLLERR
,
EPOLLHUP
from
errno
import
EAGAIN
,
EEXIST
,
EINTR
,
ENOENT
...
...
@@ -35,7 +35,6 @@ class EpollEventManager(object):
"""This class manages connections and events based on epoll(5)."""
_timeout
=
None
_trigger_exit
=
False
def
__init__
(
self
):
self
.
connection_dict
=
{}
...
...
@@ -43,6 +42,7 @@ class EpollEventManager(object):
self
.
writer_set
=
set
()
self
.
epoll
=
epoll
()
self
.
_pending_processing
=
[]
self
.
_trigger_list
=
[]
self
.
_trigger_fd
,
w
=
os
.
pipe
()
os
.
close
(
w
)
self
.
_trigger_lock
=
Lock
()
...
...
@@ -231,9 +231,12 @@ class EpollEventManager(object):
if
fd
==
self
.
_trigger_fd
:
with
self
.
_trigger_lock
:
self
.
epoll
.
unregister
(
fd
)
if
self
.
_trigger_exit
:
del
self
.
_trigger_exit
thread
.
exit
()
action_list
=
self
.
_trigger_list
try
:
while
action_list
:
action_list
.
pop
(
0
)()
finally
:
del
action_list
[:]
continue
if
conn
.
readable
():
self
.
_addPendingConnection
(
conn
)
...
...
@@ -253,9 +256,9 @@ class EpollEventManager(object):
def
setTimeout
(
self
,
*
args
):
self
.
_timeout
,
self
.
_on_timeout
=
args
def
wakeup
(
self
,
exit
=
False
):
def
wakeup
(
self
,
*
actions
):
with
self
.
_trigger_lock
:
self
.
_trigger_
exit
|=
exit
self
.
_trigger_
list
+=
actions
try
:
self
.
epoll
.
register
(
self
.
_trigger_fd
)
except
IOError
,
e
:
...
...
neo/lib/protocol.py
View file @
f2796d9c
...
...
@@ -20,7 +20,7 @@ import traceback
from
cStringIO
import
StringIO
from
struct
import
Struct
PROTOCOL_VERSION
=
8
PROTOCOL_VERSION
=
9
# Size restrictions.
MIN_PACKET_SIZE
=
10
...
...
@@ -1175,6 +1175,25 @@ class SetClusterState(Packet):
_answer
=
Error
class
Repair
(
Packet
):
"""
Ask storage nodes to repair their databases. ctl -> A -> M
"""
_flags
=
map
(
PBoolean
,
(
'dry_run'
,
# 'prune_orphan' (commented because it's the only option for the moment)
))
_fmt
=
PStruct
(
'repair'
,
PFUUIDList
,
*
_flags
)
_answer
=
Error
class
RepairOne
(
Packet
):
"""
See Repair. M -> S
"""
_fmt
=
PStruct
(
'repair'
,
*
Repair
.
_flags
)
class
ClusterInformation
(
Packet
):
"""
Notify information about the cluster
...
...
@@ -1684,6 +1703,10 @@ class Packets(dict):
TweakPartitionTable
,
ignore_when_closed
=
False
)
SetClusterState
=
register
(
SetClusterState
,
ignore_when_closed
=
False
)
Repair
=
register
(
Repair
)
NotifyRepair
=
register
(
RepairOne
)
NotifyClusterInformation
=
register
(
ClusterInformation
)
AskClusterState
,
AnswerClusterState
=
register
(
...
...
neo/lib/threaded_app.py
View file @
f2796d9c
...
...
@@ -14,7 +14,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import
threading
,
weakref
import
thread
,
thread
ing
,
weakref
from
.
import
logging
from
.app
import
BaseApplication
from
.connection
import
ConnectionClosed
...
...
@@ -69,7 +69,7 @@ class ThreadedApplication(BaseApplication):
conn
.
close
()
# Stop polling thread
logging
.
debug
(
'Stopping %s'
,
self
.
poll_thread
)
self
.
em
.
wakeup
(
True
)
self
.
em
.
wakeup
(
thread
.
exit
)
else
:
super
(
ThreadedApplication
,
self
).
close
()
...
...
neo/master/handlers/administration.py
View file @
f2796d9c
...
...
@@ -147,6 +147,19 @@ class AdministrationHandler(MasterHandler):
logging
.
warning
(
'No node added'
)
conn
.
answer
(
Errors
.
Ack
(
'No node added'
))
def
repair
(
self
,
conn
,
uuid_list
,
*
args
):
getByUUID
=
self
.
app
.
nm
.
getByUUID
node_list
=
[]
for
uuid
in
uuid_list
:
node
=
getByUUID
(
uuid
)
if
node
is
None
or
not
(
node
.
isStorage
()
and
node
.
isIdentified
()):
raise
ProtocolError
(
"invalid storage node %s"
%
uuid_str
(
uuid
))
node_list
.
append
(
node
)
repair
=
Packets
.
NotifyRepair
(
*
args
)
for
node
in
node_list
:
node
.
notify
(
repair
)
conn
.
answer
(
Errors
.
Ack
(
''
))
def
tweakPartitionTable
(
self
,
conn
,
uuid_list
):
app
=
self
.
app
state
=
app
.
getClusterState
()
...
...
neo/neoctl/app.py
View file @
f2796d9c
...
...
@@ -36,6 +36,7 @@ action_dict = {
'tweak'
:
'tweakPartitionTable'
,
'drop'
:
'dropNode'
,
'kill'
:
'killNode'
,
'prune_orphan'
:
'pruneOrphan'
,
'truncate'
:
'truncate'
,
}
...
...
@@ -146,20 +147,20 @@ class TerminalNeoCTL(object):
assert
len
(
params
)
==
0
return
self
.
neoctl
.
startCluster
()
def
_getStorageList
(
self
,
params
):
if
len
(
params
)
==
1
and
params
[
0
]
==
'all'
:
node_list
=
self
.
neoctl
.
getNodeList
(
NodeTypes
.
STORAGE
)
return
[
node
[
2
]
for
node
in
node_list
]
return
map
(
self
.
asNode
,
params
)
def
enableStorageList
(
self
,
params
):
"""
Enable cluster to make use of pending storages.
Parameters: all
node [node [...]]
node: if "all", add all pending storage nodes.
Parameters: node [node [...]]
node: if "all", add all pending storage nodes,
otherwise, the list of storage nodes to enable.
"""
if
len
(
params
)
==
1
and
params
[
0
]
==
'all'
:
node_list
=
self
.
neoctl
.
getNodeList
(
NodeTypes
.
STORAGE
)
uuid_list
=
[
node
[
2
]
for
node
in
node_list
]
else
:
uuid_list
=
map
(
self
.
asNode
,
params
)
return
self
.
neoctl
.
enableStorageList
(
uuid_list
)
return
self
.
neoctl
.
enableStorageList
(
self
.
_getStorageList
(
params
))
def
tweakPartitionTable
(
self
,
params
):
"""
...
...
@@ -189,6 +190,20 @@ class TerminalNeoCTL(object):
"""
return
uuid_str
(
self
.
neoctl
.
getPrimary
())
def
pruneOrphan
(
self
,
params
):
"""
Fix database by deleting unreferenced raw data
This can take a long time.
Parameters: dry_run node [node [...]]
dry_run: 0 or 1
node: if "all", ask all connected storage nodes to repair,
otherwise, only the given list of storage nodes.
"""
dry_run
=
"01"
.
index
(
params
.
pop
(
0
))
return
self
.
neoctl
.
repair
(
self
.
_getStorageList
(
params
),
dry_run
)
def
truncate
(
self
,
params
):
"""
Truncate the database at the given tid.
...
...
neo/neoctl/neoctl.py
View file @
f2796d9c
...
...
@@ -172,6 +172,12 @@ class NeoCTL(BaseApplication):
raise
RuntimeError
(
response
)
return
response
[
1
]
def
repair
(
self
,
*
args
):
response
=
self
.
__ask
(
Packets
.
Repair
(
*
args
))
if
response
[
0
]
!=
Packets
.
Error
or
response
[
1
]
!=
ErrorCodes
.
ACK
:
raise
RuntimeError
(
response
)
return
response
[
2
]
def
truncate
(
self
,
tid
):
response
=
self
.
__ask
(
Packets
.
Truncate
(
tid
))
if
response
[
0
]
!=
Packets
.
Error
or
response
[
1
]
!=
ErrorCodes
.
ACK
:
...
...
neo/scripts/neostorage.py
View file @
f2796d9c
...
...
@@ -30,8 +30,6 @@ parser.add_option('-d', '--database', help = 'database connections string')
parser
.
add_option
(
'-e'
,
'--engine'
,
help
=
'database engine'
)
parser
.
add_option
(
'-w'
,
'--wait'
,
help
=
'seconds to wait for backend to be '
'available, before erroring-out (-1 = infinite)'
,
type
=
'float'
,
default
=
0
)
parser
.
add_option
(
'--prune-orphan'
,
action
=
'store_true'
,
help
=
'fix database'
' by deleting unreferenced raw data, and exit (this can take a long time)'
)
parser
.
add_option
(
'--reset'
,
action
=
'store_true'
,
help
=
'remove an existing database if any, and exit'
)
...
...
@@ -55,7 +53,5 @@ def main(args=None):
# and then, load and run the application
from
neo.storage.app
import
Application
app
=
Application
(
config
)
if
config
.
getPruneOrphan
():
print
app
.
dm
.
pruneOrphan
(),
'deleted record(s)'
elif
not
config
.
getReset
():
if
not
config
.
getReset
():
app
.
run
()
neo/storage/database/manager.py
View file @
f2796d9c
...
...
@@ -14,7 +14,9 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import
threading
from
collections
import
defaultdict
from
contextlib
import
contextmanager
from
functools
import
wraps
from
neo.lib
import
logging
,
util
from
neo.lib.exception
import
DatabaseFailure
...
...
@@ -53,6 +55,7 @@ class DatabaseManager(object):
ENGINES
=
()
_deferred
=
0
_duplicating
=
_repairing
=
None
def
__init__
(
self
,
database
,
engine
=
None
,
wait
=
0
):
"""
...
...
@@ -71,11 +74,27 @@ class DatabaseManager(object):
if
attr
==
"_getPartition"
:
np
=
self
.
getNumPartitions
()
value
=
lambda
x
:
x
%
np
el
s
e
:
el
if
self
.
_duplicating
is
Non
e
:
return
self
.
__getattribute__
(
attr
)
else
:
value
=
getattr
(
self
.
_duplicating
,
attr
)
setattr
(
self
,
attr
,
value
)
return
value
@
contextmanager
def
_duplicate
(
self
):
cls
=
self
.
__class__
db
=
cls
.
__new__
(
cls
)
db
.
_duplicating
=
self
try
:
db
.
_connect
()
finally
:
del
db
.
_duplicating
try
:
yield
db
finally
:
db
.
close
()
@
abstract
def
_parse
(
self
,
database
):
"""Called during instantiation, to process database parameter."""
...
...
@@ -424,11 +443,6 @@ class DatabaseManager(object):
aborted before vote. This method is used to reclaim the wasted space.
"""
def
pruneOrphan
(
self
):
n
=
self
.
_pruneData
(
self
.
getOrphanList
())
self
.
commit
()
return
n
@
abstract
def
_pruneData
(
self
,
data_id_list
):
"""To be overridden by the backend to delete any unreferenced data
...
...
@@ -604,6 +618,37 @@ class DatabaseManager(object):
self
.
_setTruncateTID
(
None
)
self
.
commit
()
def
repair
(
self
,
weak_app
,
dry_run
):
t
=
self
.
_repairing
if
t
and
t
.
is_alive
():
logging
.
error
(
'already repairing'
)
return
def
repair
():
l
=
threading
.
Lock
()
l
.
acquire
()
def
finalize
():
try
:
if
data_id_list
and
not
dry_run
:
self
.
commit
()
logging
.
info
(
"repair: deleted %s orphan records"
,
self
.
_pruneData
(
data_id_list
))
self
.
commit
()
finally
:
l
.
release
()
try
:
with
self
.
_duplicate
()
as
db
:
data_id_list
=
db
.
getOrphanList
()
logging
.
info
(
"repair: found %s records that may be orphan"
,
len
(
data_id_list
))
weak_app
().
em
.
wakeup
(
finalize
)
l
.
acquire
()
finally
:
del
self
.
_repairing
logging
.
info
(
"repair: done"
)
t
=
self
.
_repairing
=
threading
.
Thread
(
target
=
repair
)
t
.
daemon
=
1
t
.
start
()
@
abstract
def
getTransaction
(
self
,
tid
,
all
=
False
):
"""Return a tuple of the list of OIDs, user information,
...
...
neo/storage/handlers/__init__.py
View file @
f2796d9c
...
...
@@ -14,6 +14,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import
weakref
from
neo.lib
import
logging
from
neo.lib.handler
import
EventHandler
from
neo.lib.exception
import
PrimaryFailure
,
StoppedOperation
...
...
@@ -59,3 +60,7 @@ class BaseMasterHandler(EventHandler):
def
askFinalTID
(
self
,
conn
,
ttid
):
conn
.
answer
(
Packets
.
AnswerFinalTID
(
self
.
app
.
dm
.
getFinalTID
(
ttid
)))
def
notifyRepair
(
self
,
conn
,
*
args
):
app
=
self
.
app
app
.
dm
.
repair
(
weakref
.
ref
(
app
),
*
args
)
neo/tests/threaded/__init__.py
View file @
f2796d9c
...
...
@@ -367,7 +367,7 @@ class ServerNode(Node):
raise
ConnectorException
def
stop
(
self
):
self
.
em
.
wakeup
(
True
)
self
.
em
.
wakeup
(
thread
.
exit
)
class
AdminApplication
(
ServerNode
,
neo
.
admin
.
app
.
Application
):
pass
...
...
neo/tests/threaded/test.py
View file @
f2796d9c
...
...
@@ -17,6 +17,7 @@
import
os
import
sys
import
threading
import
time
import
transaction
import
unittest
from
thread
import
get_ident
...
...
@@ -1424,6 +1425,39 @@ class Test(NEOThreadedTest):
finally
:
cluster
.
stop
()
def
testPruneOrphan
(
self
):
cluster
=
NEOCluster
(
storage_count
=
2
,
partitions
=
2
)
try
:
cluster
.
start
()
cluster
.
importZODB
()(
3
)
bad
=
[]
ok
=
[]
def
data_args
(
value
):
return
makeChecksum
(
value
),
value
,
0
node_list
=
[]
for
i
,
s
in
enumerate
(
cluster
.
storage_list
):
node_list
.
append
(
s
.
uuid
)
if
i
:
s
.
dm
.
holdData
(
*
data_args
(
'boo'
))
ok
.
append
(
s
.
getDataLockInfo
())
for
i
in
xrange
(
3
-
i
):
s
.
dm
.
storeData
(
*
data_args
(
'!'
*
i
))
bad
.
append
(
s
.
getDataLockInfo
())
s
.
dm
.
commit
()
def
check
(
dry_run
,
expected
):
cluster
.
neoctl
.
repair
(
node_list
,
dry_run
)
for
e
,
s
in
zip
(
expected
,
cluster
.
storage_list
):
while
1
:
self
.
tic
()
if
s
.
dm
.
_repairing
is
None
:
break
time
.
sleep
(.
1
)
self
.
assertEqual
(
e
,
s
.
getDataLockInfo
())
check
(
1
,
bad
)
check
(
0
,
ok
)
check
(
1
,
ok
)
finally
:
cluster
.
stop
()
if
__name__
==
"__main__"
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment