Commit dd1d6b30 authored by Julien Muchembled's avatar Julien Muchembled

storage: new --prune-orphan option

parent ccbf7bce
...@@ -130,6 +130,10 @@ class ConfigurationManager(object): ...@@ -130,6 +130,10 @@ class ConfigurationManager(object):
# only from command line # only from command line
return self.argument_list.get('reset', False) return self.argument_list.get('reset', False)
def getPruneOrphan(self):
# only from command line
return self.argument_list.get('prune_orphan', False)
def getUUID(self): def getUUID(self):
# only from command line # only from command line
uuid = self.argument_list.get('uuid', None) uuid = self.argument_list.get('uuid', None)
......
...@@ -30,6 +30,8 @@ parser.add_option('-d', '--database', help = 'database connections string') ...@@ -30,6 +30,8 @@ parser.add_option('-d', '--database', help = 'database connections string')
parser.add_option('-e', '--engine', help = 'database engine') parser.add_option('-e', '--engine', help = 'database engine')
parser.add_option('-w', '--wait', help='seconds to wait for backend to be ' parser.add_option('-w', '--wait', help='seconds to wait for backend to be '
'available, before erroring-out (-1 = infinite)', type='float', default=0) 'available, before erroring-out (-1 = infinite)', type='float', default=0)
parser.add_option('--prune-orphan', action='store_true', help='fix database'
' by deleting unreferenced raw data, and exit (this can take a long time)')
parser.add_option('--reset', action='store_true', parser.add_option('--reset', action='store_true',
help='remove an existing database if any, and exit') help='remove an existing database if any, and exit')
...@@ -53,5 +55,7 @@ def main(args=None): ...@@ -53,5 +55,7 @@ def main(args=None):
# and then, load and run the application # and then, load and run the application
from neo.storage.app import Application from neo.storage.app import Application
app = Application(config) app = Application(config)
if not config.getReset(): if config.getPruneOrphan():
print app.dm.pruneOrphan(), 'deleted record(s)'
elif not config.getReset():
app.run() app.run()
...@@ -304,7 +304,7 @@ class ImporterDatabaseManager(DatabaseManager): ...@@ -304,7 +304,7 @@ class ImporterDatabaseManager(DatabaseManager):
getPartitionTable changePartitionTable getPartitionTable changePartitionTable
getUnfinishedTIDDict dropUnfinishedData abortTransaction getUnfinishedTIDDict dropUnfinishedData abortTransaction
storeTransaction lockTransaction unlockTransaction storeTransaction lockTransaction unlockTransaction
storeData _pruneData deferCommit storeData getOrphanList _pruneData deferCommit
""".split(): """.split():
setattr(self, x, getattr(self.db, x)) setattr(self, x, getattr(self.db, x))
......
...@@ -409,6 +409,20 @@ class DatabaseManager(object): ...@@ -409,6 +409,20 @@ class DatabaseManager(object):
is always the case at tpc_vote. is always the case at tpc_vote.
""" """
@abstract
def getOrphanList(self):
"""Return the list of data id that is not referenced by the obj table
This is a repair method, and it's usually expensive.
There was a bug that did not free data of transactions that were
aborted before vote. This method is used to reclaim the wasted space.
"""
def pruneOrphan(self):
n = self._pruneData(self.getOrphanList())
self.commit()
return n
@abstract @abstract
def _pruneData(self, data_id_list): def _pruneData(self, data_id_list):
"""To be overridden by the backend to delete any unreferenced data """To be overridden by the backend to delete any unreferenced data
...@@ -417,6 +431,8 @@ class DatabaseManager(object): ...@@ -417,6 +431,8 @@ class DatabaseManager(object):
- not in self._uncommitted_data - not in self._uncommitted_data
- and not referenced by a fully-committed object (storage should have - and not referenced by a fully-committed object (storage should have
an index or a refcount of all data ids of all objects) an index or a refcount of all data ids of all objects)
The returned value is the number of deleted rows from the data table.
""" """
@abstract @abstract
......
...@@ -471,6 +471,11 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -471,6 +471,11 @@ class MySQLDatabaseManager(DatabaseManager):
_structLL = struct.Struct(">LL") _structLL = struct.Struct(">LL")
_unpackLL = _structLL.unpack _unpackLL = _structLL.unpack
def getOrphanList(self):
return [x for x, in self.query(
"SELECT id FROM data LEFT JOIN obj ON (id=data_id)"
" WHERE data_id IS NULL")]
def _pruneData(self, data_id_list): def _pruneData(self, data_id_list):
data_id_list = set(data_id_list).difference(self._uncommitted_data) data_id_list = set(data_id_list).difference(self._uncommitted_data)
if data_id_list: if data_id_list:
...@@ -491,6 +496,8 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -491,6 +496,8 @@ class MySQLDatabaseManager(DatabaseManager):
if bigid_list: if bigid_list:
q("DELETE FROM bigdata WHERE id IN (%s)" q("DELETE FROM bigdata WHERE id IN (%s)"
% ",".join(map(str, bigid_list))) % ",".join(map(str, bigid_list)))
return len(id_list)
return 0
def _bigData(self, value): def _bigData(self, value):
bigdata_id, length = self._unpackLL(value) bigdata_id, length = self._unpackLL(value)
......
...@@ -376,6 +376,11 @@ class SQLiteDatabaseManager(DatabaseManager): ...@@ -376,6 +376,11 @@ class SQLiteDatabaseManager(DatabaseManager):
packed, buffer(''.join(oid_list)), packed, buffer(''.join(oid_list)),
buffer(user), buffer(desc), buffer(ext), u64(ttid))) buffer(user), buffer(desc), buffer(ext), u64(ttid)))
def getOrphanList(self):
return [x for x, in self.query(
"SELECT id FROM data LEFT JOIN obj ON (id=data_id)"
" WHERE data_id IS NULL")]
def _pruneData(self, data_id_list): def _pruneData(self, data_id_list):
data_id_list = set(data_id_list).difference(self._uncommitted_data) data_id_list = set(data_id_list).difference(self._uncommitted_data)
if data_id_list: if data_id_list:
...@@ -385,6 +390,8 @@ class SQLiteDatabaseManager(DatabaseManager): ...@@ -385,6 +390,8 @@ class SQLiteDatabaseManager(DatabaseManager):
% ",".join(map(str, data_id_list)))) % ",".join(map(str, data_id_list))))
q("DELETE FROM data WHERE id IN (%s)" q("DELETE FROM data WHERE id IN (%s)"
% ",".join(map(str, data_id_list))) % ",".join(map(str, data_id_list)))
return len(data_id_list)
return 0
def storeData(self, checksum, data, compression, def storeData(self, checksum, data, compression,
_dup=unique_constraint_message("data", "hash", "compression")): _dup=unique_constraint_message("data", "hash", "compression")):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment