ColumnMap.py 31.3 KB
Newer Older
1
from __future__ import absolute_import
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
##############################################################################
#
# Copyright (c) 2008-2009 Nexedi SARL and Contributors. All Rights Reserved.
#                    Jean-Paul Smets-Solanes <jp@nexedi.com>
#                    Vincent Pelletier <vincent@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################

31
import re
32
import itertools
Vincent Pelletier's avatar
Vincent Pelletier committed
33
from zLOG import LOG, WARNING, INFO
34
from .interfaces.column_map import IColumnMap
35
from zope.interface.verify import verifyClass
36
from zope.interface import implementer
37 38 39
from Products.ZSQLCatalog.interfaces.column_map import IColumnMap
from Products.ZSQLCatalog.TableDefinition import (PlaceHolderTableDefinition,
                                                  TableAlias,
40 41
                                                  InnerJoin,
                                                  LeftJoin)
42
import six
43 44 45

DEFAULT_GROUP_ID = None

46
MAPPING_TRACE = False
47 48 49 50 51 52 53

# TODO: handle left joins
# TODO: handle straight joins
# TODO: make it possible to do: query=ComplexQuery(Query(source_title='foo'), Query(source_title='bar')), sort_on=[('source_title_1', )]
#       currently, it's not possible because related_key_dict is indexed by related key name, which makes 'source_title_1' lookup fail. It should be indexed by group (probably).
# TODO: rename all "related_key" references into "virtual_column"

54 55
re_sql_as = re.compile("\s+AS\s[^)]+$", re.IGNORECASE | re.MULTILINE)

56
@implementer(IColumnMap)
57 58
class ColumnMap(object):

59 60 61
  def __init__(self,
               catalog_table_name=None,
               table_override_map=None,
62
               left_join_list=None,
63
               inner_join_list=None,
64
               implicit_join=False):
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
    self.catalog_table_name = catalog_table_name
    # Key: group
    # Value: set of column names
    self.registry = {}
    # Key: group
    # Value: dict
    #  Key: column name
    #  Value: set of SimpleQuery
    self.simple_query_dict = {}
    # Key: (group, column name)
    # Value: table name
    self.column_map = {}
    # Key: (group, table name)
    # Value: table alias
    self.table_alias_dict = {}
    # Key: related key name
    # Value: (group, column name)
    self.related_key_dict = {}
    # Key: related_column
    # Value: last used alias order
    self.related_key_order_dict = {}
    # Key: group
    # Value: relate_key
    self.related_group_dict = {}
    # Key: table alias
    # Value: table name
    self.table_map = {}
    # Key: raw column
    # Value: (function, column)
    self.raw_column_dict = {}
    # Entries: column name
    self.column_ignore_set = set()
97
    self.join_table_map = {}
98 99 100 101
    # BBB: Remove join_query_list and its uses when all RelatedKey
    # methods have been converted to properly return each Join
    # condition separately, and all uses of catalog's from_expression
    # have been removed.
102
    self.join_query_list = []
103 104 105 106
    self.table_override_map = table_override_map or {}
    self.table_definition = PlaceHolderTableDefinition()
    # We need to keep track of the original definition to do inner joins on it
    self._inner_table_definition = self.table_definition
107
    self.left_join_list = left_join_list
108 109 110 111
    self.implicit_join = implicit_join
    assert not (self.implicit_join and self.left_join_list), (
      "Cannot do left_joins while forcing implicit join"
    )
112 113 114 115
    self.inner_join_list = inner_join_list
    assert not set(left_join_list).intersection(inner_join_list), (
      "left_join_list and inner_join_list intersect"
    )
116 117

  def registerColumn(self, raw_column, group=DEFAULT_GROUP_ID, simple_query=None):
118
    assert ' as ' not in raw_column.lower(), raw_column
119
    # Sanitize input: extract column from raw column (might contain COUNT, ...).
120
    # XXX This is not enough to parse something like:
121
    # GROUP_CONCAT(DISTINCT foo ORDER BY bar)
122 123 124
    if '(' in raw_column:
      function, column = raw_column.split('(')
      column = column.strip()
125
      assert column[-1] == ')', column
126 127 128 129
      column = column[:-1].strip()
    else:
      function = None
      column = raw_column
130 131
    # Remove 'DISTINCT ' etc. from column.
    column = column.split()[-1]
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
    # Remove '`' from column.
    column = column.replace('`', '')
    # Extract table name from column, if any.
    if '.' in column:
      # Assuming the part before the dot is a real table name, not an alias.
      table, column = column.split('.')
    else:
      table = None

    self.raw_column_dict[raw_column] = (function, column)
    self.registry.setdefault(group, set()).add(column)
    self.simple_query_dict.setdefault(group, {}).setdefault(column, set()).add(simple_query)
    if table is not None:
      # Register table alias and mark column as resolved.
      self.registerTable(table, alias=table, group=group)
      self.resolveColumn(column, table, group=group)
      if group is DEFAULT_GROUP_ID and table != self.catalog_table_name:
        # When a column is registered  in default group and is explicitely
        # mapped to a table, we must mark its table as requiring a join with
        # catalog table (unless it's the catalog table, of course).
152
        self._addJoinTableForColumn(table, table + "." + column, group)
153 154 155 156 157 158 159 160 161 162 163 164

  def ignoreColumn(self, column):
    self.column_ignore_set.add(column)

  def registerRelatedKey(self, related_column, column):
    # XXX: should we store the group, or directly the table on which the column is mapped ?
    # The former avoids duplicating data, but requires one more lookup (group + column -> table)
    # The latter makes it harder (?) to split the mapping in multiple queries (if splitting by groups turns out to be a good idea)
    real_related_column = related_column
    order = self.related_key_order_dict.get(real_related_column, 0) + 1
    related_column = '%s_%s' % (related_column, order)
    group = 'related_%s' % (related_column, )
165 166 167
    assert group not in self.registry, (group, self.registry)
    assert group not in self.related_group_dict, (group,
      self.related_group_dict)
168 169 170 171 172 173 174
    self.related_key_order_dict[real_related_column] = order
    self.related_key_dict[real_related_column] = (group, column)
    self.registerColumn(column, group=group)
    self.related_group_dict[group] = related_column
    return group

  def registerRelatedKeyColumn(self, related_column, position, group):
175
    assert group in self.related_group_dict, (group, self.related_group_dict)
176
    group = self.getRelatedKeyGroup(position, group)
177 178
    assert group not in self.related_group_dict, (group,
      self.related_group_dict)
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
    self.related_group_dict[group] = related_column
    return group

  def getRelatedKeyGroup(self, position, group):
    return '%s_column_%s' % (group, position)

  def registerTable(self, table_name, alias=None, group=DEFAULT_GROUP_ID):
    table_alias_dict = self.table_alias_dict
    table_alias_key = (group, table_name)
    existing_value = table_alias_dict.get(table_alias_key)
    # alias = None, existing = None -> store
    # alias = None, existing ! None -> skip
    # alias ! None, existing = None -> store & resolve
    # alias ! None, existing ! None -> skip if alias = existing, raise otherwise
    if existing_value is None:
      table_alias_dict[table_alias_key] = alias
      if alias is not None:
        self.resolveTable(table_name, alias, group=group)
    elif alias is not None and alias != existing_value:
198 199 200
      raise ValueError(
        "Table %r for group %r is aliased as %r, can't alias it now as %r"
        % (table_name, group, existing_value, alias))
201 202 203 204 205 206

  def _mapColumns(self, column_table_map, table_usage_dict, column_name_set, group, vote_result_dict):
    mapping_dict = {}
    catalog_table_name = self.catalog_table_name

    # Map all columns to tables decided by vote.
207
    for column_name, candidate_dict in six.iteritems(vote_result_dict):
208 209
      # candidate_dict is never empty
      max_score = 0
210
      for table_name, score in six.iteritems(candidate_dict):
211 212 213 214 215 216 217
        if score > max_score:
          max_score = score
          best_count = 0
          best_choice = table_name
        elif score == max_score:
          best_count += 1
      if best_count:
Vincent Pelletier's avatar
Vincent Pelletier committed
218
        LOG('ColumnMap', WARNING, 'Mapping vote led to a tie. Mapping to %r' % (best_choice, ))
219
      if MAPPING_TRACE:
Vincent Pelletier's avatar
Vincent Pelletier committed
220
        LOG('ColumnMap', INFO, 'Mapping by vote %r to %r' % (column_name, best_choice))
221 222
      mapping_dict[column_name] = best_choice
      column_name_set.remove(column_name)
223
      for table_name, column_set in six.iteritems(table_usage_dict):
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
        if table_name != best_choice:
          column_set.discard(column_name)

    # Map all remaning columns.
    def table_weight(a):
      """
        Compute each table weight.
      """
      if (group, a[0]) in self.table_alias_dict:
        result = (2, )
      elif a[0] == catalog_table_name:
        result = (1, )
      else:
        result = (0, len(a[1]))
      return result
    # Sort table name list, first has the most required columns
240
    weighted_table_list = sorted(six.iteritems(table_usage_dict), key=table_weight)
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
    while len(weighted_table_list):
      table_name, column_set = weighted_table_list.pop()
      if len(column_set):
        common_column_set = column_name_set.intersection(column_set)
        if len(common_column_set):
          # Only allow usage of this table if any of those is true:
          # - current table is the catalog  (if any catalog was provided)
          # - there are column used on that table which are already mapped
          #   (this does not include columns mapped by this code)
          #   If columns are mapped to this table in current group, then using
          #   it will not require a new join, so it should be allowed.
          #   Note: it would be good to take indexes into account when there
          #   are multiple candidate tables.
          # - any of those columns belongs exclusively to this table
          #   Although the list of tables those columns belong to is known
          #   earlier (in "build"), mapping them here
          #   - avoids code duplication (registerTable, resolveColumn,
258
          #     _addJoinTableForColumn)
259 260 261 262 263 264 265 266 267 268 269 270
          #   - offers user to vote for an unknown table, overriding this
          #     forced mapping.
          use_allowed = table_name == catalog_table_name or \
                        len(common_column_set) < len(column_set)
          if not use_allowed:
            for column_name in column_set:
              if len(column_table_map.get(column_name, [])) == 1:
                # There is no alternative, mark as required
                use_allowed = True
                break
          if use_allowed:
            for column_name in common_column_set:
271
              if MAPPING_TRACE:
Vincent Pelletier's avatar
Vincent Pelletier committed
272
                LOG('ColumnMap', INFO, 'Mapping by default %r to %r' % \
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
                    (column_name, table_name))
              mapping_dict[column_name] = table_name
              # This column must not be resolved any longer
              column_name_set.remove(column_name)
              # Remove this column from sets containing it. This prevents from
              # giving a high score to a table which columns would already have
              # been mapped to another table.
              for ignored, other_column_set in weighted_table_list:
                other_column_set.discard(column_name)
            weighted_table_list.sort(key=table_weight)
          else:
            # All column which are mappable on that table are to-be-mapped
            # columns. This means that this table was not explicitely used, and
            # as each table contain a different amount of lines, we should not
            # join with any non-explicit table. Hence, we skip this mapping.
Vincent Pelletier's avatar
Vincent Pelletier committed
288
            LOG('ColumnMap', INFO, 'Skipping possible map of %r on %r as that table' \
289 290 291
                ' is not explicitely used.' % (common_column_set, table_name))

    # Detect incomplete mappings
292 293
    if column_name_set:
      raise ValueError('Could not map those columns: %r' % column_name_set)
294 295

    # Do the actual mapping
296
    for column_name, table_name in six.iteritems(mapping_dict):
297
      # Mark this column as resolved
298
      if MAPPING_TRACE:
Vincent Pelletier's avatar
Vincent Pelletier committed
299
        LOG('ColumnMap', INFO, 'Mapping column %s to table %s' % (column_name, table_name))
300 301 302
      self.registerTable(table_name, group=group)
      self.resolveColumn(column_name, table_name, group=group)
      if table_name != catalog_table_name:
303
        self._addJoinTableForColumn(table_name, column_name, group)
304 305

  def build(self, sql_catalog):
306
    join_query_to_build_list = []
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
    catalog_table_name = self.catalog_table_name
    if catalog_table_name is None:
      return

    column_table_map = sql_catalog.getColumnMap()
    table_vote_method_list = [getattr(sql_catalog, x) for x in sql_catalog.sql_catalog_table_vote_scripts]

    # Generate missing joins from default group (this is required to allow using related keys outside of queries: order_by, sort_on, ...)
    column_set = self.registry.get(DEFAULT_GROUP_ID, [])
    for column_name in column_set:
      if column_name not in column_table_map and column_name not in self.related_key_dict:
        related_key_definition = sql_catalog.getRelatedKeyDefinition(column_name)
        if related_key_definition is not None:
          join_query = sql_catalog.getSearchKey(column_name, 'RelatedKey').buildQuery(sql_catalog=sql_catalog, related_key_definition=related_key_definition)
          join_query.registerColumnMap(sql_catalog, self)
322
          join_query_to_build_list.append(join_query)
323 324

    # List all possible tables, with all used column for each
325
    for group, column_set in six.iteritems(self.registry):
326 327 328 329 330 331 332 333 334 335 336
      # unique needed column name set
      column_name_set = set()
      # table -> column_set, including alternatives
      table_usage_dict = {}

      for column_name in column_set:
        if column_name == '*' or column_name in self.column_ignore_set:
          continue
        table_name_list = column_table_map.get(column_name, [])
        if len(table_name_list) == 0:
          if not(group is DEFAULT_GROUP_ID and column_name in self.related_key_dict):
Vincent Pelletier's avatar
Vincent Pelletier committed
337
            LOG('ColumnMap', WARNING, 'Not a known column name: %r' % (column_name, ))
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
          continue
        column_map_key = (group, column_name)
        if column_map_key in self.column_map:
          # Column is already mapped, so we must count this column as being available only on that table. Its mapping will not change, and it will impact table schema choice.
          table_name = self.column_map[column_map_key]
          assert table_name in table_name_list, '%r not in %r' % (table_name, table_name_list)
          table_name_list = [table_name]
        else:
          # Mark this column as requiring to be mapped.
          column_name_set.add(column_name)
        for table_name in table_name_list:
          table_usage_dict.setdefault(table_name, set()).add(column_name)
      # XXX: mutable datatypes are provided to vote method. if it modifies
      # them, it can introduce mapping bugs. Copying them might be costly,
      # especialy if done before each call, since they also contain mutable
      # types.
      # XXX: the API of vote methods is not stable yet. Parameters should
      # always be passed and expected by name, to make it less painful to
      # change API.
      # XXX: there is no check that the table voted for contains mapped
      # column. It is up to the user not to do stupid things.
      vote_result_dict = {}
      simple_query_dict = self.simple_query_dict[group]
      for table_vote_method in table_vote_method_list:
        vote_dict = table_vote_method(column_name_set=column_name_set,
                                      simple_query_dict=simple_query_dict,
                                      table_usage_dict=table_usage_dict,
                                      group=group)
        if isinstance(vote_dict, dict):
367
          for column, table in six.iteritems(vote_dict):
368 369 370 371
            if column in column_name_set:
              column_vote_dict = vote_result_dict.setdefault(column, {})
              column_vote_dict[table] = column_vote_dict.get(table, 0) + 1
            else:
Vincent Pelletier's avatar
Vincent Pelletier committed
372
              LOG('ColumnMap', WARNING, 'Vote script %r voted for a ' \
373 374 375
                  'non-candidate column: %r, candidates are: %r. Ignored.' %
                  (table_vote_method, column, column_name_set))
        else:
Vincent Pelletier's avatar
Vincent Pelletier committed
376
          LOG('ColumnMap', WARNING, 'Vote script %r returned invalid data: %r. ' \
377 378 379 380 381
              'Ignored.' % (table_vote_method, vote_dict))
      self._mapColumns(column_table_map, table_usage_dict, column_name_set, group, vote_result_dict)

    table_alias_number_dict = {}

382
    for (group, table_name), alias in six.iteritems(self.table_alias_dict):
383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
      if alias is None:
        if group in self.related_group_dict:
          alias_table_name = 'related_%s_%s' % (self.related_group_dict[group], table_name)
        else:
          alias_table_name = table_name
        table_alias_number = table_alias_number_dict.get(alias_table_name, 0)
        while True:
          if table_alias_number == 0:
            alias = alias_table_name
          else:
            alias = '%s_%s' % (alias_table_name, table_alias_number)
          table_alias_number += 1
          if alias not in self.table_map:
            break
        table_alias_number_dict[alias_table_name] = table_alias_number
      self.resolveTable(table_name, alias, group=group)

400
    # now that we have all aliases, calculate missing joins comming from
401
    # non-RelatedKey relationships (like full_text).
402
    self._calculateMissingJoins()
403 404 405 406
    # and all left joins that did not come from explicit queries
    # (i.e. joins comming from 'sort_on', 'select_dict', etc.)
    for join_query in join_query_to_build_list:
      # XXX ugly use of inner attribute of join_query. Please Refactor:
407 408
      # search_keys don't actually return SQLExpressions, but they add
      # join definitions in the column_map
409 410 411 412
      join_query.search_key.buildSQLExpression(sql_catalog=sql_catalog,
                                               column_map=self,
                                               only_group_columns=False,
                                               group=join_query.group,)
413
    if MAPPING_TRACE:
414 415 416 417 418 419 420 421 422
      # Key: group
      # Value: 2-tuple
      #  dict
      #   Key: column
      #   Value: table name
      #  dict
      #   Key: table name
      #   Value: table alias
      summary_dict = {}
423
      for (group, column), table_name in six.iteritems(self.column_map):
424 425 426
        column_dict = summary_dict.setdefault(group, ({}, {}))[0]
        assert column not in column_dict, '%r in %r' % (column, column_dict)
        column_dict[column] = table_name
427
      for (group, table_name), table_alias in six.iteritems(self.table_alias_dict):
428 429 430
        table_dict = summary_dict.setdefault(group, ({}, {}))[1]
        assert table_name not in table_dict, '%r in %r' % (table_name, table_dict)
        table_dict[table_name] = table_alias
431
      for group, (column_dict, table_dict) in six.iteritems(summary_dict):
Vincent Pelletier's avatar
Vincent Pelletier committed
432 433
        LOG('ColumnMap', INFO, 'Group %r:' % (group, ))
        LOG('ColumnMap', INFO, ' Columns:')
434
        for column, table_name in six.iteritems(column_dict):
Vincent Pelletier's avatar
Vincent Pelletier committed
435 436
          LOG('ColumnMap', INFO, '  %r from table %r' % (column, table_name))
        LOG('ColumnMap', INFO, ' Tables:')
437
        for table_name, table_alias in six.iteritems(table_dict):
Vincent Pelletier's avatar
Vincent Pelletier committed
438
          LOG('ColumnMap', INFO, '  %r as %r' % (table_name, table_alias))
439 440

  def asSQLColumn(self, raw_column, group=DEFAULT_GROUP_ID):
441
    if self.catalog_table_name is None or '.' in raw_column or '*' in raw_column:
442
      if raw_column.endswith('__score__'):
443 444 445 446 447
        return raw_column.replace('.', '_')
      return raw_column
    if raw_column.endswith('__score__'):
      raw_column = raw_column[:-9]
      column_suffix = '__score__'
448
    else:
449 450 451 452 453 454 455 456 457 458 459 460
      column_suffix = ''
    function, column = self.raw_column_dict.get(raw_column, (None, raw_column))
    if group is DEFAULT_GROUP_ID:
      group, column = self.related_key_dict.get(column, (group, raw_column))
    try:
      table_name = self.column_map[(group, column)]
    except KeyError:
      if raw_column not in self.column_ignore_set:
        raise
      result = raw_column
    else:
      table_alias = self.table_alias_dict[(group, table_name)]
461
      if column_suffix:
462
        result = '%s_%s%s' % (table_alias, column, column_suffix)
463
      else:
464 465 466
        result = '`%s`.`%s`' % (table_alias, column)
    if function is not None:
      result = '%s(%s)' % (function, result)
467 468 469 470 471
    return result

  def getCatalogTableAlias(self, group=DEFAULT_GROUP_ID):
    return self.table_alias_dict[(group, self.catalog_table_name)]

472 473
  def _isBackwardCompatibilityRequired(self):
    return bool(
474 475 476 477
      # if they explicitly ask for implicit
      self.implicit_join or
      # if they don't pass a catalog alias, we cannot do explicit joins
      not self._setMinimalTableDefinition() or
478 479 480
      # If one or more RelatedKey methods weren't converted, we'll get
      # queries for an implicit inner join, so we have to do all joins
      # as implicit.
481
      self.join_query_list or
482 483 484 485
      # for now, work in BW compat mode if a table_override
      # is passed.  It only works for simple subselect
      # definitions anyway, and it's being used primarily
      # for writing left-joins manually.
486
      self.table_override_map)
487

488
  def getTableAliasDict(self):
489 490
    if self._isBackwardCompatibilityRequired():
      # BBB: Using implicit joins or explicit from_expression
491 492 493
      return self.table_map.copy()
    else:
      return None
494 495

  def resolveColumn(self, column, table_name, group=DEFAULT_GROUP_ID):
496 497 498
    assert group in self.registry, (group, self.registry)
    assert column in self.registry[group], (column, group,
      self.registry[group])
499 500
    assert table_name
    assert column
501 502
    column_map_key = (group, column)
    column_map = self.column_map
503 504
    assert (group, table_name) in self.table_alias_dict, (group, table_name,
      self.table_alias_dict)
505 506 507 508 509
    previous_value = column_map.get(column_map_key)
    if previous_value is None:
      column_map[column_map_key] = table_name
    elif previous_value != table_name:
      if column == 'uid':
Vincent Pelletier's avatar
Vincent Pelletier committed
510
        LOG('ColumnMap', WARNING, 'Attempt to remap uid from %r to %r ignored.' % (previous_value, table_name))
511
      else:
512
        raise ValueError('Cannot remap a column to another table. column_map[%r] = %r, new = %r' % (column_map_key, previous_value, table_name))
513 514 515

  def resolveTable(self, table_name, alias, group=DEFAULT_GROUP_ID):
    table_alias_key = (group, table_name)
516 517 518 519
    assert table_alias_key in self.table_alias_dict, (table_alias_key,
      self.table_alias_dict)
    assert self.table_alias_dict[table_alias_key] in (None, alias), (
      table_alias_key, self.table_alias_dict[table_alias_key], alias)
520
    self.table_alias_dict[table_alias_key] = alias
521 522
    assert self.table_map.get(alias) in (None, table_name), (alias,
      self.table_map.get(alias), table_name)
523 524 525 526 527
    self.table_map[alias] = table_name

  def getTableAlias(self, table_name, group=DEFAULT_GROUP_ID):
    return self.table_alias_dict[(group, table_name)]

528 529 530 531 532 533 534
  def _addJoinQueryForColumn(self, column, query):
    # BBB: This is a backward compatibility method that will be
    # removed in the future, when all related key methods have been adapted
    # to provide all Join conditions separately
    if column in self.left_join_list:
      raise RuntimeError('Left Join requested for column: %r, but rendered '
                         'join query is not compatible and would result in an '
535
                         'Implicit Inner Join:\n%s' %
536
                         (column, query,))
537 538 539
    self.join_query_list.append(query)

  def iterJoinQueryList(self):
540 541 542
    if self._isBackwardCompatibilityRequired():
      # Return all join queries for implicit join, and all the other
      # queries we were using to build explicit joins, but won't be able to.
543 544 545
      return itertools.chain(self.join_query_list,
                             self.table_definition.getJoinConditionQueryList())
    return []
546

547

548
  def _addJoinTableForColumn(self, table_name, column, group=DEFAULT_GROUP_ID):
549
    """
550
      Declare given table as requiring to be joined with catalog table on uid.
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565

      table_name (string)
        Table name.
      group (string)
        Group id of given table.
    """
    catalog_table = self.catalog_table_name
    if catalog_table is not None:
      # Only join tables when there is a catalog table
      # Register unconditionaly catalog table
      self.registerTable(catalog_table)
      if 'uid' not in self.registry.get(DEFAULT_GROUP_ID, ()):
        # Register uid column if it is not already
        self.registerColumn('uid')
        self.resolveColumn('uid', catalog_table)
566
      self.join_table_map.setdefault((group, table_name), set()).add(column)
567 568 569

  def getJoinTableAliasList(self):
    return [self.getTableAlias(table_name, group=group)
570
            for (group, table_name) in self.join_table_map.keys()]
571

572 573 574 575 576 577 578 579
  def _getTableOverride(self, table_name):
    # self.table_override_map is a dictionary mapping table names to
    # strings containing aliases of arbitrary table definitions
    # (including subselects). So we split the alias and discard it
    # since we do our own aliasing.
    table_override_w_alias = self.table_override_map.get(table_name)
    if table_override_w_alias is None:
      return table_name
580 581
    # XXX move the cleanup of table alias overrides to EntireQuery
    # class or ZSQLCatalog, so we don't need SQL syntax knowledge in
582
    # ColumnMap.
583 584 585
    #
    # Normalise the AS sql keyword to remove the last
    # aliasing in the string if present. E.g.:
586 587
    #
    # '(SELECT sub_catalog.*
588 589 590 591 592
    #   FROM catalog AS sub_catalog
    #   WHERE sub_catalog.parent_uid=183) AS catalog'
    #
    # becomes:
    #
593
    # '(SELECT sub_catalog.*
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
    #   FROM catalog AS sub_catalog
    #   WHERE sub_catalog.parent_uid=183)'
    table_override, removed = re_sql_as.subn('', table_override_w_alias)
    assert removed < 2, ('More than one table aliasing was removed from %r' %
                        table_override_w_alias)
    if removed:
      LOG('ColumnMap', WARNING,
          'Table overrides should not contain aliasing: %r' % table_override)
    return table_override

  def makeTableAliasDefinition(self, table_name, table_alias):
    """Make a table alias, giving a change to ColumnMap to override
    the original table definition with another expression"""
    table_name = self._getTableOverride(table_name)
    assert table_name and table_alias, ("table_name (%r) and table_alias (%r) "
                                        "must both be defined" %
                                        (table_name, table_alias))
    return TableAlias(table_name, table_alias)

  def _setMinimalTableDefinition(self):
    """ Set a minimal table definition: the main catalog alias

    We don't do this at __init__ because we have neither the catalog
    table name nor its intended alias at that point.
    """
    inner_def = self._inner_table_definition
    if inner_def.table_definition is None:
      try:
        catalog_table_alias = self.getCatalogTableAlias()
      except KeyError:
624 625 626
        LOG('ColumnMap', WARNING,
            '_setMinimalTableDefinition called but the main catalog has not '
            'yet received an alias!')
627 628 629 630 631 632
        return False
      inner_def.replace(self.makeTableAliasDefinition(self.catalog_table_name,
                                                      catalog_table_alias))
    return True

  def getTableDefinition(self):
633
    if self._isBackwardCompatibilityRequired():
634 635 636 637
      # BBB: One of the RelatedKeys registered an implicit join, do
      # not return a table definition, self.getTableAliasDict() should
      # be used instead
      return None
638
    self.table_definition.checkTableAliases()
639 640 641 642 643 644
    return self.table_definition

  def addRelatedKeyJoin(self, column, right_side, condition):
    """ Wraps the current table_definition in the left-side of a new
    join.  Use an InnerJoin or a LeftJoin depending on whether the
    column is in the left_join_list or not.
645
    """
646 647 648
    # XXX: to fix TestERP5Catalog.test_52_QueryAndTableAlias, create
    # here a list of joins and try to merge each new entry into one of
    # the pre-existing entries by comparing their right-sides.
649
    #
650 651 652 653
    # XXX 2: This is the place were we could do ordering of inner and left
    # joins so as to get better performance. For instance, a quick win is to
    # add all inner-joins first, and all left-joins later. We could also decide
    # on the order of left-joins based on the order of self.left_join_list or
654 655 656 657 658
    # even a catalog property/configuration/script.
    #
    # XXX 3: This is also the place where we could check if explicit
    # table aliases should cause some of these table definitions to be
    # collapsed into others.
659
    assert self._setMinimalTableDefinition()
660
    Join = column not in self.inner_join_list and (column in self.left_join_list or
661 662
     (not self.implicit_join and column in self.registry.get(DEFAULT_GROUP_ID, ())))\
      and LeftJoin or InnerJoin
663 664
    join_definition = Join(self.table_definition, right_side,
                           condition=condition)
665 666 667
    self.table_definition = join_definition

  # def getFinalTableDefinition(self):
668
  #   self._calculateMissingJoins()
669 670
  #   return self.getTableDefinition()

671 672
  def _calculateMissingJoins(self):
    left_join_set = set(self.left_join_list)
673 674
    self._setMinimalTableDefinition()
    catalog_table_alias = self.getCatalogTableAlias()
675 676
    for (group, table_name), column_set in self.join_table_map.items():
      # if any of the columns for this implicit join was requested as a
677
      # left-join, then all columns will be subject to a left-join.
678 679 680 681 682 683 684 685
      # XXX What if one of the columns was an actual query, as opposed to a
      # sort column or select_dict? This would cause results in the main
      # catalog that don't match the query to be present as well. We expect
      # the user which passes a left_join_list to know what he is doing.
      if column_set.intersection(left_join_set):
        Join = LeftJoin
      else:
        Join = InnerJoin
686 687 688 689 690 691
      table_alias = self.getTableAlias(table_name, group=group)
      table_alias_def = self.makeTableAliasDefinition(table_name, table_alias)
      # XXX: perhaps refactor some of the code below to do:
      # self._inner_table_definition.addInnerJoin(TableAlias(...),
      #                                           condition=(...))
      self._inner_table_definition.replace(
692
        Join(self._inner_table_definition.table_definition,
693 694 695 696 697 698 699
                  table_alias_def,
                  # XXX ColumnMap shouldn't have SQL knowledge
                  condition=('`%s`.`uid` = `%s`.`uid`' %
                             (table_alias, catalog_table_alias)),
                  )
        )

700 701
verifyClass(IColumnMap, ColumnMap)