extension.erp5.JupyterCompile.py 42.9 KB
Newer Older
1
# -*- coding: utf-8 -*-
2 3 4
from matplotlib.figure import Figure
from IPython.core.display import DisplayObject
from IPython.lib.display import IFrame
5
from cStringIO import StringIO
6 7
from erp5.portal_type import Image
from types import ModuleType
8
from ZODB.serialize import ObjectWriter
9
import cPickle
10
import sys
11
import traceback
12
import ast
13
import base64
14
import json
15
import transaction
16
import Acquisition
17
import astor
18
import importlib
19
from Products.ERP5Type.Log import log
20

21 22 23 24 25 26 27 28 29 30
# Display matplotlib figure automatically like
# the original python kernel
import matplotlib
import matplotlib.pyplot as plt
from IPython.core.pylabtools import print_figure
from IPython.core.display import _pngxy
from ipykernel.jsonutil import json_clean, encode_images
import threading
display_data_wrapper_lock = threading.Lock()

31 32 33 34
# Well known unserializable types
from Record import Record
well_known_unserializable_type_tuple = (ModuleType, Record,)

35 36
def Base_executeJupyter(self, python_expression=None, reference=None, \
                        title=None, request_reference=False, **kw):
37 38 39 40 41
  # Check permissions for current user and display message to non-authorized user 
  if not self.Base_checkPermission('portal_components', 'Manage Portal'):
    return "You are not authorized to access the script"
  
  # Convert the request_reference argument string to their respeced boolean values
42 43
  request_reference = {'True': True, \
                       'False': False}.get(request_reference, False)
44 45 46 47 48
  
  # Return python dictionary with title and reference of all notebooks
  # for request_reference=True
  if request_reference:
    data_notebook_list = self.portal_catalog(portal_type='Data Notebook')
49 50
    notebook_detail_list = [{'reference': obj.getReference(), \
                             'title': obj.getTitle()} for obj in data_notebook_list]
51 52 53 54 55 56 57 58 59 60 61
    return notebook_detail_list
  
  if not reference:
    message = "Please set or use reference for the notebook you want to use"
    return message
  
  # Take python_expression as '' for empty code from jupyter frontend
  if not python_expression:
    python_expression = ''
  
  # Get Data Notebook with the specific reference
62 63 64
  data_notebook = self.portal_catalog.getResultValue(
                         portal_type='Data Notebook',
                         reference=reference)
65 66 67 68 69
  
  # Create new Data Notebook if reference doesn't match with any from existing ones
  if not data_notebook:
    notebook_module = self.getDefaultModule(portal_type='Data Notebook')
    data_notebook = notebook_module.DataNotebookModule_addDataNotebook(
70 71 72
                                      title=title,
                                      reference=reference,
                                      batch_mode=True)
73 74 75
  
  # Add new Data Notebook Line to the Data Notebook
  data_notebook_line = data_notebook.DataNotebook_addDataNotebookLine(
76 77
                                       notebook_code=python_expression,
                                       batch_mode=True)
78 79 80 81 82 83 84 85
  
  # Gets the context associated to the data notebook being used
  old_notebook_context = data_notebook.getNotebookContext()
  if not old_notebook_context:
    old_notebook_context = self.Base_createNotebookContext()
  
  # Pass all to code Base_runJupyter external function which would execute the code
  # and returns a dict of result
86
  final_result = displayDataWrapper(lambda:Base_runJupyterCode(self, python_expression, old_notebook_context))
87 88 89 90 91
    
  new_notebook_context = final_result['notebook_context']
  
  result = {
    u'code_result': final_result['result_string'],
92 93
    u'print_result': final_result['print_result'],
    u'displayhook_result': final_result['displayhook_result'],
94 95 96 97
    u'ename': final_result['ename'],
    u'evalue': final_result['evalue'],
    u'traceback': final_result['traceback'],
    u'status': final_result['status'],
98 99 100
    u'mime_type': final_result['mime_type'],
    u'extra_data_list': final_result['extra_data_list'],
  }
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
  
  # Updates the context in the notebook with the resulting context of code 
  # execution.
  data_notebook.setNotebookContext(new_notebook_context)
  
  # We try to commit, but the notebook context property may have variables that
  # cannot be serialized into the ZODB and couldn't be captured by our code yet.
  # In this case we abort the transaction and warn the user about it. Unforunately,
  # the exeception raised when this happens doesn't help to know exactly which
  # object caused the problem, so we cannot tell the user what to fix.
  try:
    transaction.commit()
  except transaction.interfaces.TransactionError as e:
    transaction.abort()
    exception_dict = getErrorMessageForException(self, e, new_notebook_context)
    result.update(exception_dict)
    return json.dumps(result)
  
  # Catch exception while seriaizing the result to be passed to jupyter frontend
  # and in case of error put code_result as None and status as 'error' which would
  # be shown by Jupyter frontend
  try:
    serialized_result = json.dumps(result)
  except UnicodeDecodeError:
    result = {
      u'code_result': None,
127 128
      u'print_result': None,
      u'displayhook_result': None,
129 130 131 132
      u'ename': u'UnicodeDecodeError',
      u'evalue': None,
      u'traceback': None,
      u'status': u'error',
133
      u'mime_type': result['mime_type']}
134 135 136
    serialized_result = json.dumps(result)
  
  data_notebook_line.edit(
137 138
    notebook_code_result = result['code_result'], 
    mime_type = result['mime_type'])
139 140 141 142
  
  return serialized_result  


143 144 145 146 147 148 149 150 151
def mergeTracebackListIntoResultDict(result_dict, error_result_dict_list):
  if error_result_dict_list:
    if result_dict['traceback'] is None:
      result_dict['traceback'] = []
    for error_result_dict in error_result_dict_list:
      result_dict['traceback'].append(error_result_dict['traceback'])
      result_dict['status'] = error_result_dict['status']
  return result_dict

152 153 154 155 156 157 158

def matplotlib_pre_run():
  matplotlib.interactive(True)
  rc = {'figure.figsize': (6.0,4.0),
        'figure.facecolor': (1,1,1,0),
        'figure.edgecolor': (1,1,1,0),
        'font.size': 10,
159
        'figure.dpi': 72,
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
        'figure.subplot.bottom' : .125
        }
  for key, value in rc.items():
    matplotlib.rcParams[key] = value
  plt.gcf().clear()

def matplotlib_post_run(data_list):
  png_data = None
  figure = plt.gcf()
  # Always try to get the current figure.
  # This is not efficient, but we can support any libraries
  # that use matplotlib.
  png_data = print_figure(figure, fmt='png')
  figure.clear()
  if png_data is not None:
    width, height = _pngxy(png_data)
    data = encode_images({'image/png':png_data})
    metadata = {'image/png':dict(width=width, height=height)}
    data_list.append(json_clean(dict(data=data, metadata=metadata)))

class Displayhook(object):
  def hook(self, value):
    if value is not None:
183 184 185 186
      if getattr(value, '_repr_html_', None) is not None:
        self.result = {'data':{'text/html':value._repr_html_()}, 'metadata':{}}
      else:
        self.result = repr(value)
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
  def pre_run(self):
    self.old_hook = sys.displayhook
    sys.displayhook = self.hook
    self.result = None
  def post_run(self):
    sys.displayhook = self.old_hook
displayhook = Displayhook()

def displayDataWrapper(function):
  with display_data_wrapper_lock:
    # pre run
    displayhook.pre_run()
    matplotlib_pre_run()
    extra_data_list = []
    try:
      result = function()
      extra_data_list = result.get('extra_data_list', [])
    finally:
      # post run
      displayhook.post_run()
      matplotlib_post_run(extra_data_list)
  result['extra_data_list'] = extra_data_list
  return result

211
def Base_runJupyterCode(self, jupyter_code, old_notebook_context):
212
  """
213
    Function to execute jupyter code and update the context dictionary.
214
    Code execution depends on 'interactivity', a.k.a , if the ast.node object has
215
    ast.Expr instance (valid for expressions) or not.
216
    
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
    old_notebook_context should contain both variables dict and setup functions.
    Here, setup dict is {key: value} pair of setup function names and another dict,
    which contains the function's alias and code, as string. These functions
    should be executed before `jupyter_code` to properly create the required
    environment.

    For example:
    old_notebook_context =  {
      'setup': {
        'numpy setup': {
          'func_name': 'numpy_setup_function',
          'code': ...
        }
      },
      'variables': {
        'my_variable': 1
      }
    }
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251

    The behaviour would be similar to that of jupyter notebook:-
    ( https://github.com/ipython/ipython/blob/master/IPython/core/interactiveshell.py#L2954 )
    Example:

      code1 = '''
      23
      print 23 #Last node not an expression, interactivity = 'last'
      '''
      out1 = '23'

      code2 = '''
      123
      12 #Last node an expression, interactivity = 'none'
      '''
      out2 = '12'

252
  """
253
  mime_type = 'text/plain'
254 255
  status = u'ok'
  ename, evalue, tb_list = None, None, None
256
  
257 258
  # Other way would be to use all the globals variables instead of just an empty
  # dictionary, but that might hamper the speed of exec or eval.
259 260
  # Something like -- user_context = globals(); user_context['context'] = self;
  user_context = {}
261
  output = ''
262 263 264

  # Saving the initial globals dict so as to compare it after code execution
  globals_dict = globals()
265
  notebook_context = old_notebook_context
266

267 268 269
  inject_variable_dict = {}
  current_var_dict = {}
  current_setup_dict = {}
270 271
  setup_error_return_dict_list = []

272 273 274
  # Execute only if jupyter_code is not empty
  if jupyter_code:
    # Create ast parse tree
275 276 277 278 279
    try:
      ast_node = ast.parse(jupyter_code)
    except Exception as e:
      # It's not necessary to abort the current transaction here 'cause the 
      # user's code wasn't executed at all yet.
280
      return getErrorMessageForException(self, e, notebook_context)
281
    
282 283
    # Fixing "normal" imports and detecting environment object usage
    import_fixer = ImportFixer()
284
    print_fixer = PrintFixer()
285 286
    environment_collector = EnvironmentParser()
    ast_node = import_fixer.visit(ast_node)
287 288
    ast_node = print_fixer.visit(ast_node)
    ast.fix_missing_locations(ast_node)
289 290 291 292 293 294 295 296 297
    
    # The collector also raises errors when environment.define and undefine
    # calls are made incorrectly, so we need to capture them to propagate
    # to Jupyter for rendering.
    try:
      ast_node = environment_collector.visit(ast_node)
    except (EnvironmentDefinitionError, EnvironmentUndefineError) as e:
      transaction.abort()
      return getErrorMessageForException(self, e, notebook_context)
298
    
299 300 301
    # Get the node list from the parsed tree
    nodelist = ast_node.body

302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
    # Handle case for empty nodelist(in case of comments as jupyter_code)
    if nodelist:
      # If the last node is instance of ast.Expr, set its interactivity as 'last'
      # This would be the case if the last node is expression
      if isinstance(nodelist[-1], ast.Expr):
        interactivity = "last"
      else:
        interactivity = "none"

      # Here, we define which nodes to execute with 'single' and which to execute
      # with 'exec' mode.
      if interactivity == 'none':
        to_run_exec, to_run_interactive = nodelist, []
      elif interactivity == 'last':
        to_run_exec, to_run_interactive = nodelist[:-1], nodelist[-1:]
317 318 319
      
      # Variables used at the display hook to get the proper form to display
      # the last returning variable of any code cell.
320 321
      display_data = {'result': '', 
                      'mime_type': None}
322 323 324 325 326 327 328 329 330 331 332 333 334
      
      # This is where one part of the  display magic happens. We create an 
      # instance of ProcessorList and add each of the built-in processors.
      # The classes which each of them are responsiblefor rendering are defined
      # in the classes themselves.
      # The customized display hook will automatically use the processor
      # of the matching class to decide how the object should be displayed.
      processor_list = ProcessorList()
      processor_list.addProcessor(IPythonDisplayObjectProcessor)
      processor_list.addProcessor(MatplotlibFigureProcessor)
      processor_list.addProcessor(ERP5ImageProcessor)
      processor_list.addProcessor(IPythonDisplayObjectProcessor)
      
335 336 337 338 339 340
      # Putting necessary variables in the `exec` calls context and storing
      inject_variable_dict = {
        'context': self,
        'environment': Environment(),
        '_display_data': display_data,
        '_processor_list': processor_list,
341
        '_volatile_variable_list': [],
342
        '_print': CustomPrint()}
343
      user_context.update(inject_variable_dict)
344
      user_context.update(notebook_context['variables'])
345 346 347 348 349 350 351 352 353
      
      # Getting the environment setup defined in the current code cell
      current_setup_dict = environment_collector.getEnvironmentSetupDict()
      current_var_dict = environment_collector.getEnvironmentVarDict()

      # Removing old setup from the setup functions
      removed_setup_message_list = []
      for func_alias in environment_collector.getEnvironmentRemoveList():
        found = False
354
        for key, data in notebook_context['setup'].items():
355 356 357
          if key == func_alias:
            found = True
            func_name = data['func_name']
358
            del notebook_context['setup'][func_alias]
359 360 361 362 363 364 365 366 367 368 369 370
            try:
              del user_context[func_alias]
            except KeyError:
              pass
            removed_setup_message = (
              "%s (%s) was removed from the setup list. "
              "Variables it may have added to the context and are not pickleable "
              "were automatically removed.\n"
            ) % (func_name, func_alias)
            removed_setup_message_list.append(removed_setup_message)
            break
        if not found:
371
          transaction.abort()
372 373
          result = {
            'result_string': "EnvironmentUndefineError: Trying to remove non existing function/variable from environment: '%s'\n" % func_alias,
374 375
            'print_result': {"data":{"text/plain":"EnvironmentUndefineError: Trying to remove non existing function/variable from environment: '%s'\n" % func_alias}, "metadata":{}},
            'displayhook_result': None,
376 377 378 379 380
            'notebook_context': notebook_context,
            'status': 'ok',
            'mime_type': 'text/plain',
            'evalue': None,
            'ename': None,
381
            'traceback': None}
382
          return result
383

384 385
      # Removing all the setup functions if user call environment.clearAll()
      if environment_collector.clearAll():
386
        keys = notebook_context ['setup'].keys()
387
        for key in keys:
388
          del notebook_context['setup'][key]
389 390
      
      # Running all the setup functions that we got
391
      for key, value in notebook_context['setup'].iteritems():
392 393 394 395
        try:
          code = compile(value['code'], '<string>', 'exec')
          exec(code, user_context, user_context)
        # An error happened, so we show the user the stacktrace along with a
396
        # note that the exception happened in a setup function's code.
397 398 399
        except Exception as e:
          if value['func_name'] in user_context:
            del user_context[value['func_name']]
400
          error_return_dict = getErrorMessageForException(self, e, notebook_context)
401 402
          additional_information = "An error happened when trying to run the one of your setup functions:"
          error_return_dict['traceback'].insert(0, additional_information)
403 404
          setup_error_return_dict_list.append(error_return_dict)

405 406 407 408 409 410 411 412 413 414 415
      # Iterating over envinronment.define calls captured by the environment collector
      # that are functions and saving them as setup functions.
      for func_name, data in current_setup_dict.iteritems():
        setup_string = (
          "%s\n"
          "_result = %s()\n"
          "if _result and isinstance(_result, dict):\n"
          "    globals().update(_result)\n"
          "_volatile_variable_list += _result.keys()\n"
          "del %s, _result\n"
        ) % (data['code'], func_name, func_name)
416
        notebook_context['setup'][data['alias']] = {
417
          "func_name": func_name,
418
          "code": setup_string}
419 420 421 422 423

      # Iterating over envinronment.define calls captured by the environment collector
      # that are simple variables and saving them in the setup.
      for variable, value, in current_var_dict.iteritems():
        setup_string = "%s = %s\n" % (variable, repr(value))
424
        notebook_context['setup'][variable] = {
425
          'func_name': variable,
426
          'code': setup_string}
427 428 429
        user_context['_volatile_variable_list'] += variable
        
      if environment_collector.showEnvironmentSetup():
430
        inject_variable_dict.write("%s\n" % str(notebook_context['setup']))
431 432 433 434 435

      # Execute the nodes with 'exec' mode
      for node in to_run_exec:
        mod = ast.Module([node])
        code = compile(mod, '<string>', "exec")
436
        try:
437
          exec(code, user_context, user_context)
438 439 440 441
        except Exception as e:
          # Abort the current transaction. As a consequence, the notebook lines
          # are not added if an exception occurs.
          transaction.abort()
442 443
          return mergeTracebackListIntoResultDict(getErrorMessageForException(self, e, notebook_context),
                                                  setup_error_return_dict_list)
444 445 446 447

      # Execute the interactive nodes with 'single' mode
      for node in to_run_interactive:
        mod = ast.Interactive([node])
448
        try:
449 450
          code = compile(mod, '<string>', 'single')
          exec(code, user_context, user_context)
451 452 453 454
        except Exception as e:
          # Abort the current transaction. As a consequence, the notebook lines
          # are not added if an exception occurs.
          transaction.abort()
455 456
          return mergeTracebackListIntoResultDict(getErrorMessageForException(self, e, notebook_context),
                                                  setup_error_return_dict_list)
457

458
      mime_type = display_data['mime_type'] or mime_type
459
      inject_variable_dict['_print'].write("\n".join(removed_setup_message_list) + display_data['result'])
460

461 462
    # Saves a list of all the variables we injected into the user context and
    # shall be deleted before saving the context.
463
    volatile_variable_list = current_setup_dict.keys() + inject_variable_dict.keys() + user_context.get('_volatile_variable_list', [])
464 465
    volatile_variable_list.append('__builtins__')

466
    for key, val in user_context.items():
467
      if not key in globals_dict.keys() and not isinstance(val, well_known_unserializable_type_tuple) and not key in volatile_variable_list:
468
        if canSerialize(val):
469 470
          notebook_context['variables'][key] = val
        else:
471
          del user_context[key]
472
          message = (
473
            "Cannot serialize the variable named %s whose value is %s, "
474 475 476 477
            "thus it will not be stored in the context. "
            "You should move it's definition to a function and " 
            "use the environment object to load it.\n"
          ) % (key, val)
478
          inject_variable_dict['_print'].write(message)
479 480
    
    # Deleting from the variable storage the keys that are not in the user 
481 482
    # context anymore (i.e., variables that are deleted by the user).
    for key in notebook_context['variables'].keys():
483
      if not key in user_context:
484
        del notebook_context['variables'][key]
485
    
486 487
    if inject_variable_dict.get('_print') is not None:
      output = inject_variable_dict['_print'].getCapturedOutputString()
488 489 490

  displayhook_result = {"data":{}, "metadata":{}}
  if displayhook.result is not None:
491 492 493 494
    if isinstance(displayhook.result, str):
      displayhook_result["data"]["text/plain"] = displayhook.result
    elif isinstance(displayhook.result, dict):
      displayhook_result = displayhook.result
495
  result = {
496
    'result_string': output,
497 498
    'print_result': {"data":{"text/plain":output}, "metadata":{}},
    'displayhook_result': displayhook_result,
499
    'notebook_context': notebook_context,
500
    'status': status,
501
    'mime_type': mime_type,
502 503
    'evalue': evalue,
    'ename': ename,
504
    'traceback': tb_list}
505
  return mergeTracebackListIntoResultDict(result, setup_error_return_dict_list)
506 507


508 509 510 511 512 513 514 515
class EnvironmentUndefineError(TypeError):
  pass


class EnvironmentDefinitionError(TypeError):
  pass


516
def canSerialize(obj):
Ivan Tyagov's avatar
Ivan Tyagov committed
517

518
  container_type_tuple = (list, tuple, dict, set, frozenset)
519

520 521 522 523 524 525 526 527 528 529 530 531 532
  # if object is a container, we need to check its elements for presence of
  # objects that cannot be put inside the zodb
  if isinstance(obj, container_type_tuple):
    if isinstance(obj, dict):
      result_list = []
      for key, value in obj.iteritems():
        result_list.append(canSerialize(key))
        result_list.append(canSerialize(value))
    else:
      result_list = [canSerialize(element) for element in obj]
    return all(result_list)
  # if obj is an object and implements __getstate__, ZODB.serialize can check
  # if we can store it
533
  elif isinstance(obj, object) and hasattr(obj, '__getstate__') and hasattr(obj, '_p_jar'):
534 535 536
    # Need to unwrap the variable, otherwise we get a TypeError, because
    # objects cannot be pickled while inside an acquisition wrapper.
    unwrapped_obj = Acquisition.aq_base(obj)
537 538 539 540 541
    try:
      writer = ObjectWriter(unwrapped_obj)
    except:
      # Ignore any exceptions, otherwise Jupyter becomes permanent unusble state.
      return False
542 543 544 545
    for obj in writer:
      try:
        writer.serialize(obj)
      # Because writer.serialize(obj) relies on the implementation of __getstate__
546
      # of obj, all errors can happen, so the "except all" is necessary here.
547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
      except:
        return False
    return True
  else:
    # If cannot serialize object with ZODB.serialize, try with cPickle
    # Only a dump of the object is not enough. Dumping and trying to
    # load it will properly raise errors in all possible situations, 
    # for example: if the user defines a dict with an object of a class 
    # that he created the dump will stil work, but the load will fail. 
    try:
      cPickle.loads(cPickle.dumps(obj))
    # By unknowing reasons, trying to catch cPickle.PicklingError in the "normal"
    # way isn't working. This issue might be related to some weirdness in 
    # pickle/cPickle that is reported in this issue: http://bugs.python.org/issue1457119.
    #
    # So, as a temporary fix, we're investigating the exception's class name as
    # string to be able to identify them.
    # 
    # Even though the issue seems complicated, this quickfix should be 
    # properly rewritten in a better way as soon as possible.
Ivan Tyagov's avatar
Ivan Tyagov committed
567
    except (cPickle.PicklingError, TypeError, NameError, AttributeError):
568
      return False
569 570
    else:
      return True
571
  
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591
  
class CustomPrint(object):
  
  def __init__(self):
    self.captured_output_list = []
    
  def write(self, *args):
    self.captured_output_list += args
    
  def getCapturedOutputString(self):
    return ''.join(self.captured_output_list)
    

class PrintFixer(ast.NodeTransformer):
    
  def visit_Print(self, node):
    _print_name_node = ast.Name(id="_print", ctx=ast.Load())
    node.dest = _print_name_node
    return node
  
592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636

class EnvironmentParser(ast.NodeTransformer):
  """
    EnvironmentParser class is an AST transformer that walks in the abstract
    code syntax tree to find calls to `define` and `undefine`  on a variable
    named `environment`.
    
    The `define` call should receive a function, which will have it's code
    stored as string in `self.environment_setup_dict`. If only kw args are 
    provided, the variables definition will be stored in self.environment_var_dict.
    
    The `undefine` call will removed keys in self.environment_setup_dict.
  """

  def __init__(self):
    self.environment_setup_dict = {}
    self.environment_var_dict = {}
    self.environment_remove_list = []
    self.function_dict = {}
    self.environment_clear_all = False
    self.show_environment_setup = False

  def visit_FunctionDef(self, node):
    """
      Stores all the function nodes in a dictionary to be accesed later when
      we detect they are used as parameters for an `environment.define` call.
    """
    self.function_dict[node.name] = node
    return node

  def visit_Expr(self, node):
    """
      Visits expressions and check if they are in the form of either 
      `environment.define` or `environment.undefine` properly stores the 
      arguments definition as string.
    """
    value = node.value
    if isinstance(value, ast.Call):
      function = value.func
      if isinstance(function, ast.Attribute):
        attribute = function.value
        if isinstance(attribute, ast.Name):
          name = attribute.id
          if name == 'environment' and function.attr == 'define' and not value.keywords:
            if not len(value.args) == 2:
637 638 639 640 641 642 643 644 645 646 647 648 649 650
              raise EnvironmentDefinitionError('environment.define calls receive 2 arguments')
              
            self._ensureType(
              obj=value.args[0], 
              klass=ast.Name, 
              error_message='Type mismatch. environment.define receives a function as first argument.'
            )
            
            self._ensureType(
              obj=value.args[1], 
              klass=ast.Str, 
              error_message='Type mismatch. environment.define receives a string as second argument.'
            )
            
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674
            func_name = value.args[0].id
            func_alias = value.args[1].s
            function_node = self.function_dict[func_name]
            function_string = astor.to_source(function_node)
            self.environment_setup_dict[func_name] = {
              "code": function_string,
              "alias": func_alias
            }
          elif name == 'environment' and function.attr == 'define' and value.keywords:
            for keyword in value.keywords:
              arg_name = keyword.arg
              arg_value_node = keyword.value
              
              # The value can be a number, string or name. We need to handle 
              # them separatedly. This dict trick was used to avoid the very
              # ugly if.
              node_value_dict = {
                ast.Num: lambda node: str(node.n),
                ast.Str: lambda node: node.s,
                ast.Name: lambda node: node.id
              }
              arg_value = node_value_dict[type(arg_value_node)](arg_value_node)
              self.environment_var_dict[arg_name] = arg_value
          elif name == 'environment' and function.attr == 'undefine':
675 676 677 678 679 680 681
            self._ensureType(
              obj=value.args[0], 
              klass=ast.Str, 
              call_type='undefine',
              error_message='Type mismatch. environment.undefine receives only a string as argument.'
            )
            
682 683 684 685 686 687 688 689
            func_alias = value.args[0].s
            self.environment_remove_list.append(func_alias)
          elif name == 'environment' and function.attr == 'clearAll':
            self.environment_clear_all = True
          elif name == 'environment'and function.attr == 'showSetup':
            self.show_environment_setup = True
    return node
    
690 691 692 693 694 695 696 697
  def _ensureType(self, obj=None, klass=None, error_message=None, call_type='define'):
    if not isinstance(obj, klass):
      if call_type == 'define':
        error_class = EnvironmentDefinitionError
      elif call_type == 'undefine':
        error_class = EnvironmentUndefineError
      raise error_class(error_message)
    
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
  def clearAll(self):
    return self.environment_clear_all
    
  def showEnvironmentSetup(self):
    return self.show_environment_setup

  def getEnvironmentSetupDict(self):
    return self.environment_setup_dict
    
  def getEnvironmentVarDict(self):
    return self.environment_var_dict
    
  def getEnvironmentRemoveList(self):
    return self.environment_remove_list


class Environment(object):
  """
   Dumb object used to receive call on an object named `environment` inside
   user context. These calls will be tracked by the EnvironmentParser calls.
  """
  
  def define(self, *args, **kwargs):
    pass
  
  def undefine(self, name):
    pass
        
  def clearAll(self):
    pass
    
  def showSetup(self):
    pass
  

class ImportFixer(ast.NodeTransformer):
  """
   The ImportFixer class is responsivle for fixing "normal" imports that users
   might try to execute.
   
   It will automatically replace them with the proper usage of the environment
   object using AST manipulation.
  """
  
  def __init__(self):
    self.import_func_dict = {}
  
  def visit_FunctionDef(self, node):
    """
      Processes funcion definition nodes. We want to store a list of all the 
      import that are inside functions, because they do not affect the outter
      user context, thus do not imply in any un-pickleable variable being added
      there.
    """
    for child in node.body:
      if isinstance(child, ast.Import):
        for alias in child.names:
755 756 757 758 759
          if getattr(alias, 'asname'):
            import_name = alias.asname
          else:
            import_name = alias.name
          self.import_func_dict[import_name] = node.name  
760 761 762 763 764 765 766 767 768 769 770 771 772 773
    return self.generic_visit(node)
    
  def visit_ImportFrom(self, node):
    """
     Fixes `import x from y` statements in the same way `import y` is fixed.
    """
    return self.visit_Import(node)

  def visit_Import(self, node):
    """
    This function replaces `normal` imports by creating AST nodes to define
    and environment function which setups the module and return it to be merged
    with the user context.
    """
774

775
    test_import_string = None
776 777 778 779 780
    result_name = ""
    root_module_name = ""

    module_names = []

781 782
    if getattr(node, "module", None) is not None:
      # case when 'from <module_name> import <something>'
783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
      root_module_name = node.module

      if (node.names[0].name == '*'):
        # case when "from <module_name> import *"
        mod = importlib.import_module(node.module)
        tmp_dict = mod.__dict__

        for name in tmp_dict.keys():
          if (name[0] != '_'):
            module_names.append(name)

        test_import_string = "from %s import *" %(node.module)
        result_name = "%s_ALL" %(node.module)
      else:
        # case when "from <module_name> import a as b, c as d, ..."
        original_names = []
        as_names = []

        for name in node.names:
          original_names.append(name.name)
          if getattr(name, "asname", None) is None:
            as_names.append(None)
          else:
            as_names.append(name.asname)

        test_import_string = "from %s import " %(node.module)
        for i in range(0, len(original_names)):
          test_import_string = test_import_string + original_names[i]
          if as_names[i]!=None:
            test_import_string = test_import_string + ' as %s' %(as_names[i])
          test_import_string = test_import_string + ', '
        test_import_string = test_import_string[:-2]

        module_names = []
        for i in range(0, len(original_names)):
          if as_names[i]!=None:
            module_names.append(as_names[i])
          else:
            module_names.append(original_names[i])
822

823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842
        for i in range(0, len(original_names)):
          if as_names[i]!=None:
            result_name = result_name + '%s_' %(as_names[i])
          else:
            result_name = result_name + '%s_' %(original_names[i])
        result_name = result_name[:-1]



    elif getattr(node.names[0], 'asname'):
      # case when "import <module_name> as <name>""
      module_names = [(node.names[0].asname), ]
      test_import_string = "import %s as %s" %(node.names[0].name,
                                               module_names[0])
      result_name = node.names[0].asname
      root_module_name = node.names[0].name

    else:
      # case when "import <module_name>"
      module_names = [(node.names[0].name), ]
843
      test_import_string = "import %s" %node.names[0].name
844 845
      result_name = node.names[0].name
      root_module_name = node.names[0].name
846

847 848 849 850 851 852 853 854 855
    final_module_names = []
    for name in module_names:
      if not self.import_func_dict.get(name):
        final_module_names.append(name)

    log("module_names[0]: " + module_names[0])
    log("result_name: " + result_name)

    if final_module_names:
856 857 858 859
      # try to import module before it is added to environment
      # this way if user tries to import non existent module Exception
      # is immediately raised and doesn't block next Jupyter cell execution
      exec(test_import_string)
860 861 862 863 864 865

      empty_function = self.newEmptyFunction("%s_setup" %result_name)
      return_dict = self.newReturnDict(final_module_names)

      log(return_dict)

866
      empty_function.body = [node, return_dict]
867 868
      environment_set = self.newEnvironmentSetCall("%s_setup" %result_name)
      warning = self.newImportWarningCall(root_module_name, result_name)
869 870 871 872 873 874 875 876 877 878 879 880
      return [empty_function, environment_set, warning]
    else:
      return node

  def newEmptyFunction(self, func_name):
    """
      Return a AST.Function object representing a function with name `func_name`
      and an empty body.
    """
    func_body = "def %s(): pass" % func_name
    return ast.parse(func_body).body[0]

881
  def newReturnDict(self, module_names):
882 883
    """
      Return an AST.Expr representing a returned dict with one single key named
884
      `'module_name'` (as string) which returns the variable `module_name` (as
885
      expression).
886
    """
887 888 889 890
    return_dict = "return {"
    for name in module_names:
      return_dict = return_dict + "'%s': %s, " % (name, name)
    return_dict = return_dict + '}'
891 892 893 894 895 896 897 898 899 900 901
    return ast.parse(return_dict).body[0]

  def newEnvironmentSetCall(self, func_name):
    """
      Return an AST.Expr representaion an `environment.define` call receiving
      `func_name` (as an expression) and `'func_name'` (as string).
    """
    code_string = "environment.define(%s, '%s')" % (func_name, func_name)
    tree = ast.parse(code_string)
    return tree.body[0]

902
  def newImportWarningCall(self, module_name, function_name):
903 904 905 906 907 908
    """
      Return an AST.Expr representanting a print statement with a warning to an
      user about the import of a module named `module_name` and instructs him
      on how to fix it.
    """
    warning = ("print '"
909 910
               "WARNING: Your imported from the module %s without "
               "using the environment object, which is not recomended. "
911
               "Your import was automatically converted to use such method."
912 913
               "The setup function was named as: %s_setup.\\n"
               "'") % (module_name, function_name)
914 915 916 917
    tree = ast.parse(warning)
    return tree.body[0]

  
918 919 920 921 922 923 924 925 926 927 928 929
def renderAsHtml(self, renderable_object):
  '''
    renderAsHtml will render its parameter as HTML by using the matching 
    display processor for that class. Some processors can be found in this
    file. 
  '''
  # Ugly frame hack to access the processor list defined in the body of the
  # kernel's code, where `exec` is called.
  #
  # At this point the stack should be, from top to the bottom:
  #
  #   5. ExternalMethod Patch call
930
  #   4. Base_runJupyterCode frame (where we want to change variable)
931 932 933 934 935 936 937 938 939
  #   3. exec call to run the user's code
  #   2. ExternalMethod Patch call through `context.Base_renderAsHtml` in the notebook
  #   1. renderAsHtml frame (where the function is)
  # 
  # So sys._getframe(3) is enough to get us up into the frame we want.
  #
  compile_jupyter_frame = sys._getframe(3)
  compile_jupyter_locals = compile_jupyter_frame.f_locals
  processor = compile_jupyter_locals['processor_list'].getProcessorFor(renderable_object)
Ivan Tyagov's avatar
Ivan Tyagov committed
940
  result, _ = processor(renderable_object).process()
941
  compile_jupyter_locals['inject_variable_dict']['_print'].write(result)
942
  compile_jupyter_locals['display_data']['mime_type'] = 'text/html'
943

944
def getErrorMessageForException(self, exception, notebook_context):
945 946
  '''
    getErrorMessageForException receives an Expcetion object and a context for
947
    code execution (notebook_context) and will return a dict as Jupyter
948 949
    requires for error rendering.
  '''
Ivan Tyagov's avatar
Ivan Tyagov committed
950
  _, value, _ = sys.exc_info()
951 952 953 954
  traceback_text = traceback.format_exc().split('\n')[:-1]
  return {
    'status': 'error',
    'result_string': None,
955 956
    'print_result': None,
    'displayhook_result': None,
957
    'notebook_context': notebook_context,
958 959 960 961 962 963
    'mime_type': 'text/plain',
    'evalue': str(value),
    'ename': exception.__class__.__name__,
    'traceback': traceback_text
  }

964
def createNotebookContext(self):
965
  """
966
  Function to create an empty notebook context.
967
  """
968
  return {'variables': {}, 'setup': {}}
969

970 971 972 973 974 975 976
class ObjectProcessor(object):
  '''
    Basic object processor that stores the first parameters of the constructor
    in the `subject` attribute and store the target classes for that processor.
  '''
  TARGET_CLASSES=None
  TARGET_MODULES=None
977
  
978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002
  @classmethod
  def getTargetClasses(cls):
    return cls.TARGET_CLASSES
    
  @classmethod
  def getTargetModules(cls):
    return cls.TARGET_MODULES
    
  def __init__(self, something):
    self.subject = something

class MatplotlibFigureProcessor(ObjectProcessor):
  '''
    MatplotlibFigureProcessor handles the rich display of 
    matplotlib.figure.Figure objects. It displays them using an img tag with
    the inline png image encoded as base64.
  '''
  TARGET_CLASSES=[Figure,]
  TARGET_MODULES=['matplotlib.pyplot',]

  def process(self):
    image_io = StringIO()
    self.subject.savefig(image_io, format='png')
    image_io.seek(0)
    return self._getImageHtml(image_io), 'text/html'
1003
  
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
  def _getImageHtml(self, image_io):
    return '<img src="data:image/png;base64,%s" /><br />' % base64.b64encode(image_io.getvalue())
    
class ERP5ImageProcessor(ObjectProcessor):
  '''
   ERP5ImageProcessor handles the rich display of ERP5's image_module object.
   It gets the image data and content type and use them to create a proper img
   tag.
  '''
  TARGET_CLASSES=[Image,]
1014
  
1015 1016 1017 1018 1019
  def process(self):
    from base64 import b64encode
    figure_data = b64encode(self.subject.getData())
    mime_type = self.subject.getContentType()
    return '<img src="data:%s;base64,%s" /><br />' % (mime_type, figure_data), 'text/html'
1020

1021 1022 1023 1024 1025 1026 1027 1028 1029 1030
class IPythonDisplayObjectProcessor(ObjectProcessor):
  '''
    IPythonDisplayObjectProcessor handles the display of all objects from the
    IPython.display module, including: Audio, IFrame, YouTubeVideo, VimeoVideo, 
    ScribdDocument, FileLink, and FileLinks. 
    
    All these objects have the `_repr_html_` method, which is used by the class
    to render them.
  '''
  TARGET_CLASSES=[DisplayObject, IFrame]
1031
  
1032 1033 1034 1035 1036 1037 1038 1039
  def process(self):
    html_repr = self.subject._repr_html_()
    return html_repr + '<br />', 'text/html' 

class GenericProcessor(ObjectProcessor):
  '''
    Generic processor to render objects as string.
  '''
1040
  
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
  def process(self):
    return str(self.subject), 'text/plain'
    
class ProcessorList(object):
  '''
    ProcessorList is responsible to store all the processors in a dict using
    the classes they handle as the key. Subclasses of these classes will have
    the same processor of the eigen class. This means that the order of adding
    processors is important, as some classes' processors may be overwritten in
    some situations.
    
    The `getProcessorFor` method uses `something.__class__' and not 
    `type(something)` because using the later onobjects returned by portal 
    catalog queries will return an AcquisitionWrapper type instead of the 
    object's real class.
  '''
1057
  
1058 1059 1060
  def __init__(self, default=GenericProcessor):
    self.processors = {}
    self.default_processor = GenericProcessor
1061
  
1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
  def addProcessor(self, processor):
    classes = processor.getTargetClasses()
    modules = processor.getTargetModules()
    
    if classes and not len(classes) == 0:
      for klass in classes:
        self.processors[klass] = processor
        for subclass in klass.__subclasses__():
          self.processors[subclass] = processor
      
    if modules and not len(modules) == 0:
      for module in modules:
        self.processors[module] = processor
        
  def getProcessorFor(self, something):
    if not isinstance(something, ModuleType):
      return self.processors.get(something.__class__, self.default_processor)
    else:
      return self.processors.get(something.__name__, self.default_processor)
1081

1082

1083
def storeIFrame(self, html, key):
1084
  self.portal_caches.erp5_pivottable_frame_cache.set(key, html)
1085 1086
  return True

1087

1088 1089 1090 1091 1092
# WARNING! 
# 
# This is a highly experimental PivotTableJs integration which does not follow
# ERP5 Javascrpt standards and it will be refactored to use JIO and RenderJS.
#
1093
def erp5PivotTableUI(self, df):
1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156
  from IPython.display import IFrame
  template = """
  <!DOCTYPE html>
  <html>
    <head>
      <title>PivotTable.js</title>

      <!-- external libs from cdnjs -->
      <link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/c3/0.4.10/c3.min.css">
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.11.4/jquery-ui.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jquery-csv/0.71/jquery.csv-0.71.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/c3/0.4.10/c3.min.js"></script>

      <link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/pivottable/2.0.2/pivot.min.css">
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/pivottable/2.0.2/pivot.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/pivottable/2.0.2/d3_renderers.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/pivottable/2.0.2/c3_renderers.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/pivottable/2.0.2/export_renderers.min.js"></script>

      <style>
        body {font-family: Verdana;}
        .node {
         border: solid 1px white;
         font: 10px sans-serif;
         line-height: 12px;
         overflow: hidden;
         position: absolute;
         text-indent: 2px;
        }
        .c3-line, .c3-focused {stroke-width: 3px !important;}
        .c3-bar {stroke: white !important; stroke-width: 1;}
        .c3 text { font-size: 12px; color: grey;}
        .tick line {stroke: white;}
        .c3-axis path {stroke: grey;}
        .c3-circle { opacity: 1 !important; }
      </style>
    </head>
    <body>
      <script type="text/javascript">
        $(function(){
          if(window.location != window.parent.location)
            $("<a>", {target:"_blank", href:""})
              .text("[pop out]").prependTo($("body"));

          $("#output").pivotUI( 
            $.csv.toArrays($("#output").text()), 
            { 
              renderers: $.extend(
                $.pivotUtilities.renderers, 
                $.pivotUtilities.c3_renderers, 
                $.pivotUtilities.d3_renderers,
                $.pivotUtilities.export_renderers
                ),
              hiddenAttributes: [""]
            }
          ).show();
         });
      </script>
      <div id="output" style="display: none;">%s</div>
    </body>
  </html>
1157
  """
1158 1159 1160 1161
  html_string = template % df.to_csv()
  from hashlib import sha512
  key = sha512(html_string).hexdigest()
  storeIFrame(self, html_string, key)
1162 1163
  iframe_host = self.REQUEST['HTTP_X_FORWARDED_HOST'].split(',')[0]
  url = "https://%s/erp5/Base_displayPivotTableFrame?key=%s" % (iframe_host, key)
1164
  return IFrame(src=url, width='100%', height='500')