Commit e83e7960 authored by Jérome Perrin's avatar Jérome Perrin

core: repair ERP5Site_reindexLatestIndexedObjects

This script stopped working since Catalog changed to be ERP5 document,
because it was calling the unindex method of the catalog (which
itself is indexable like any other ERP5 document). Update to use the
uncatalogObject which is the method to unindex a document by uid.

Even though it was somehow working before, it was not really correct
in selecting objects, because it was using - operator on TIMESTAMP
column, which is not computing a difference in seconds as this script
was expecting. See for example https://stackoverflow.com/a/24504132/7607763
or the example below for an explanation. Instead, use TIMESTAMPADD
to compute the start timestamp only once and use >= operator, which
works as expected.

This query was also sorting by indexation_timestamp, which does not
use an index. Remove the sort because it's not really needed.

Excluding reserved path was also not needed, we no longer use these
since 69aefdff (ZSQLCatalog: Drop support for "reserved" path.,
2017-09-18)

---

Another reproduction of the timestamp arithmetic problem

select
   TIMESTAMP('2021-01-02 00:00:00') - TIMESTAMP('2021-01-01 00:00:00') a,
   20210102000000 - 20210101000000 aa,
   TIMESTAMP('2021-06-01 00:00:00') - TIMESTAMP('2021-05-31 00:00:00') b,
   20210601000000 - 20210531000000 bb,
   TIMESTAMPDIFF(second, TIMESTAMP('2021-05-31 00:00:00'), TIMESTAMP('2021-06-01 00:00:00')) c

| a | aa | b | bb | c |
| ------ | ------ | ------ | ------ | ------ |
| 1000000 | 1000000 | 70000000 | 70000000 | 86400 |
parent 9a518828
......@@ -722,3 +722,59 @@ class TestERP5Core(ERP5TypeTestCase, ZopeTestCase.Functional):
*,bar,Bar,SBar,,,3,desc
*,foo,Foo,,Rfoo,CFoo,,
""", csv_data)
def test_ERP5Site_reindexLatestIndexedObjects(self):
module = self.portal.newContent(portal_type='Folder', id='test_folder')
# simulate a state where we have:
# - a document that has a more recent state in catalog
reindexed = module.newContent(portal_type='Folder', id='reindexed', title="zodb")
# - a document that is only in catalog
only_in_catalog = module.newContent(portal_type='Folder', id='only_in_catalog')
# - an old document that should not be touched
old = module.newContent(portal_type='Folder', id='old')
self.tic()
self.portal.erp5_sql_connection().query(
'update catalog set title="catalog" where uid=%s' % reindexed.getUid())
self.portal.erp5_sql_connection().query(
'update catalog set path="not/exist", indexation_timestamp=subdate(current_date, 1) where uid=%s'
% only_in_catalog.getUid())
self.portal.erp5_sql_connection().query(
'update catalog set indexation_timestamp=subdate(current_date, 5) where uid=%s'
% old.getUid())
self.tic()
# check we have correctly simulated the state
self.assertEqual(
[
b.title for b in self.portal.portal_catalog(
select_list=('title', ), uid=reindexed.getUid(),)
], ['catalog'])
brain, = self.portal.portal_catalog(uid=only_in_catalog.getUid())
self.assertRaises(KeyError, brain.getObject)
old_indexation_timestamp, = [
b.indexation_timestamp for b in self.portal.portal_catalog(
select_list=('indexation_timestamp', ), uid=old.getUid(),)
]
output = self.portal.ERP5Site_reindexLatestIndexedObjects()
self.assertIn(", 1 object unindexed", output)
self.tic()
# the document that has a more recent state in catalog now have
# the same state in zodb and catalog (the zodb state)
self.assertEqual(
[
b.title for b in self.portal.portal_catalog(
select_list=('title', ), uid=reindexed.getUid())
], ['zodb'])
# the document that was only in catalog has been removed from catalog
self.assertFalse(
list(self.portal.portal_catalog(uid=only_in_catalog.getUid())))
# old was not reindexed
self.assertEqual(
[
b.indexation_timestamp for b in self.portal.portal_catalog(
select_list=('indexation_timestamp', ),
uid=old.getUid(),
)
], [old_indexation_timestamp])
......@@ -18,7 +18,7 @@ for candidate in candidate_list:
# Object is unreachable, remove it from catalog
# Use SQLQueue because all activities are triggered on the same object,
# and SQLDict keeps only one.
catalog.activate(activity="SQLQueue").unindexObject(uid=candidate['uid'])
catalog.activate(activity="SQLQueue").uncatalogObject(uid=candidate['uid'])
unindex_count += 1
else:
obj.reindexObject()
......
......@@ -3,7 +3,4 @@ SELECT
FROM
catalog
WHERE
path != 'reserved'
AND CURRENT_TIMESTAMP - indexation_timestamp <= <dtml-sqlvar delta type="int">
ORDER BY
indexation_timestamp DESC
\ No newline at end of file
indexation_timestamp >= TIMESTAMPADD(SECOND, - <dtml-sqlvar delta type="int">, CURRENT_TIMESTAMP)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment