From 00eca6c51dcd69117379dc919258c4dadbb81d8f Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Wed, 11 Oct 2023 08:08:22 +0200
Subject: [PATCH] software/ors-amarisoft: Switch *.json.log to emit `data`
 field via JSON
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ORS software-release emits many *.json.log files with information about current
state of eNB and radio units. Those files are used by code in slapos.toolbox to
implement corresponding promises. The files, it seems, try to match JSON Lines
format(*) with every line containing a JSON object describing timestamp, log
level and log payload of corresponding log entry.

So far all is good. One peculiarity, however, is that, contrary to everything
else, the content of the `data` field in the logs is not emitted via
JSON-encoding. Initially, starting from 4968d55c (ors-amarisoft: get eNB stats
and add promises), that `data` payload was being emitted via %s py formatting,
which is similar to what JSON-encoding would produce, but is not it exactly.
For example if log payload is {'abc': 123} then %s and JSON would quote `abc`
differently:

    {'abc': 123}  # %s
    {"abc": 123}  # JSON

That would not be a big deal if what %s produces would be still valid JSON
accepted by JSON-decoder, but unfortunately it is not so: JSONDecodeError is
raised when {'abc': 123} is tried to be decoded.

The code in slapos.toolbox, so far, is handling this situation by feeding to
JSON-decoder original input with ' characters replaced by "

    json.loads(line.decode().replace("'", '"'))     (+)

which more or less works in basic cases, but breaks when log payload contains '
character itself. For example for {"a'bc": 123} replacing gives {"a"bc": 123},
which, being invalid JSON, raises

    JSONDecodeError: Expecting ':' delimiter: line 1 column 5 (char 4)

when fed to json.loads() .

I've actually hit this problem for real with amarisoft-rf-info.json.log where
the data part contains output of `rf_info` looking e.g. as

    TX underflows=0 RX overflows=0
    TRX SDR driver 2023-09-07, API v15/18
    PCIe CPRI /dev/sdr0@0:
      Hardware ID: 0x4b12
      DNA: [0x0048248a334a7054]
      Serial: ''
      FPGA revision: 2023-06-23  10:05:24
      FPGA vccint: 0.98 V
      FPGA vccaux: 1.76 V
      FPGA vccbram: 0.98 V
      FPGA temperature: 71.9 °C
      Clock tune: 0.0 ppm
      NUMA: 0
      CPRI_option: '5' (x8) lock=no
      DMA0: TX fifo: 66.67us  Usage=16/32768 (0%)
      DMA0: RX fifo: 66.67us  Usage=16/32768 (0%)
      DMA0 Underflows: 0
      DMA0 Overflows: 0
    PCIe CPRI /dev/sdr0@1:
      Hardware ID: 0x4b12
      DNA: [0x0048248a334a7054]
      Serial: ''
      FPGA revision: 2023-06-23  10:05:24
      FPGA vccint: 0.98 V
      FPGA vccaux: 1.77 V
      FPGA vccbram: 0.98 V
      FPGA temperature: 71.7 °C
      Clock tune: 0.0 ppm
      NUMA: 0
      CPRI_option: '5' (x8) lock=HW+SW rx/tx=46.606us
        Port #0: T14=46.606us
      DMA0: TX fifo: 66.67us  Usage=16/32768 (0%)
      DMA0: RX fifo: 66.67us  Usage=16/32768 (0%)
      DMA0 Underflows: 0
      DMA0 Overflows: 0

which lead to breakage when trying to load that log due to ' symbols.

The fix is simple: change the code, that emits *.json.log to emit data payload
via json.dumps instead of %s. We can do that without breaking anything because,
contrary to enb.xlog, those *.json.log are not uploaded to any separate system
and currently are only used to implement promises without preserving log files
in any storage for any non-small amount of time. In other words, currently
those *.json.log are temporary files whose format can be adjusted without
strongly caring about backward compatibility.

b32b4a8e (software/ors-amarisoft: general improvement for RU (logs/promises/input parameters))
already changed %s to json.dumps for amarisoft-stats.json.log , but left all other logs at %s.

-> Fix this for everything by replacing %s to json.dumps in all generated logs.
-> I will also adjust slapos.toolbox accordingly.

/cc @jhuge, @lu.xu, @tomo, @xavier_thompson, @Daetalus, @jerome

(*) https://jsonlines.org/
(+) see e.g. https://lab.nexedi.com/nexedi/slapos.toolbox/blob/453dce5f/slapos/promise/plugin/util.py#L50
---
 .../ors-amarisoft/amarisoft-rf-info.jinja2.py     |  2 +-
 software/ors-amarisoft/buildout.hash.cfg          |  8 ++++----
 .../ors-amarisoft/lopcomm-rrh-software.jinja2.py  |  7 ++++---
 .../lopcomm-rrh-supervision.jinja2.py             |  5 +++--
 software/ors-amarisoft/ncclient_common.py         | 15 ++++++++-------
 5 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/software/ors-amarisoft/amarisoft-rf-info.jinja2.py b/software/ors-amarisoft/amarisoft-rf-info.jinja2.py
index 422a34703..142ef0407 100644
--- a/software/ors-amarisoft/amarisoft-rf-info.jinja2.py
+++ b/software/ors-amarisoft/amarisoft-rf-info.jinja2.py
@@ -49,7 +49,7 @@ class enbWebSocket:
                 "rf_info": True
             })
             r = self.recv('rf')
-        self.logger.info('RF info', extra={'data': r})
+        self.logger.info('RF info', extra={'data': json.dumps(r)})
 
 if __name__ == '__main__':
     ws = enbWebSocket()
diff --git a/software/ors-amarisoft/buildout.hash.cfg b/software/ors-amarisoft/buildout.hash.cfg
index 0487b1509..904b81c27 100644
--- a/software/ors-amarisoft/buildout.hash.cfg
+++ b/software/ors-amarisoft/buildout.hash.cfg
@@ -24,11 +24,11 @@ md5sum = c4d5e9fcf460d88bc2b4bcfbdfe554f7
 
 [amarisoft-rf-info.jinja2.py]
 _update_hash_filename_ = amarisoft-rf-info.jinja2.py
-md5sum = c930c28365c685a6066f382c9b5d8893
+md5sum = ab666fdfadbfc7d8a16ace38d295c883
 
 [ncclient_common]
 _update_hash_filename_ = ncclient_common.py
-md5sum = f34a196e947fa58b141431a00cc744df
+md5sum = 63316f6a8a6a480e10db7e78cca72f3b
 
 [lopcomm-rrh-stats.jinja2.py]
 _update_hash_filename_ = lopcomm-rrh-stats.jinja2.py
@@ -40,11 +40,11 @@ md5sum = f2f550b68c8ab243ce1a4bb73a9abc1c
 
 [lopcomm-rrh-software.jinja2.py]
 _update_hash_filename_ = lopcomm-rrh-software.jinja2.py
-md5sum = 860fa5e5bab65f414535c7e25f622a6c
+md5sum = 2cd8515253b75e2ab13cc77399762851
 
 [lopcomm-rrh-supervision.jinja2.py]
 _update_hash_filename_ = lopcomm-rrh-supervision.jinja2.py
-md5sum = a2ba0343ddb7f9cf2904a4c5c751f68a
+md5sum = 243d9fbf640b8dc8bf63d69b07b8afed
 
 [template-enb]
 _update_hash_filename_ = instance-enb.jinja2.cfg
diff --git a/software/ors-amarisoft/lopcomm-rrh-software.jinja2.py b/software/ors-amarisoft/lopcomm-rrh-software.jinja2.py
index 760f79459..bda970822 100644
--- a/software/ors-amarisoft/lopcomm-rrh-software.jinja2.py
+++ b/software/ors-amarisoft/lopcomm-rrh-software.jinja2.py
@@ -1,5 +1,6 @@
 #!{{ python_path }}
 import time
+import json
 import xmltodict
 import sys
 import re
@@ -60,7 +61,7 @@ if __name__ == '__main__':
                       if download_reply_xml:
                           nc.logger.info("Download proceed.")
                           download_data = xmltodict.parse(download_reply_xml)
-                          nc.software_reply_json_logger.info('', extra={'data': download_data})
+                          nc.software_reply_json_logger.info('', extra={'data': json.dumps(download_data)})
 
                       install_rpc_xml = f"""
                           <software-install xmlns="urn:o-ran:software-management:1.0">
@@ -74,7 +75,7 @@ if __name__ == '__main__':
                       if install_reply_xml:
                           nc.logger.info("Installation proceed.")
                           install_data = xmltodict.parse(install_reply_xml)
-                          nc.software_reply_json_logger.info('', extra={'data': install_data})
+                          nc.software_reply_json_logger.info('', extra={'data': json.dumps(install_data)})
 
                       if nonrunning_slot_name_build_version in "{{firmware_name}}":
                           activate_rpc_xml = f"""
@@ -88,7 +89,7 @@ if __name__ == '__main__':
                           if activate_reply_xml:
                               nc.logger.info("Activation proceed.")
                               activate_data = xmltodict.parse(activate_reply_xml)
-                              nc.software_reply_json_logger.info('', extra={'data': activate_data})
+                              nc.software_reply_json_logger.info('', extra={'data': json.dumps(activate_data)})
 
                       nc.get_inventory()
                       if nonrunning_slot_name_build_version in "{{firmware_name}}" and active_nonrunning_slot_name:
diff --git a/software/ors-amarisoft/lopcomm-rrh-supervision.jinja2.py b/software/ors-amarisoft/lopcomm-rrh-supervision.jinja2.py
index 33fe5d58b..f69a5394e 100644
--- a/software/ors-amarisoft/lopcomm-rrh-supervision.jinja2.py
+++ b/software/ors-amarisoft/lopcomm-rrh-supervision.jinja2.py
@@ -1,5 +1,6 @@
 #!{{ python_path }}
 import time
+import json
 import xmltodict
 import sys
 import re
@@ -27,7 +28,7 @@ if __name__ == '__main__':
         if supervision_subscription_reply_xml:
             nc.logger.info("Subscription created")
             supervision_subscription_data = xmltodict.parse(supervision_subscription_reply_xml)
-            nc.supervision_reply_json_logger.info('', extra={'data': supervision_subscription_data})
+            nc.supervision_reply_json_logger.info('', extra={'data': json.dumps(supervision_subscription_data)})
             while True:
               supervision_watchdog_rpc_xml = """
                   <supervision-watchdog-reset xmlns="urn:o-ran:supervision:1.0">
@@ -43,7 +44,7 @@ if __name__ == '__main__':
                       nc.logger.info("NETCONF server replied")
                       
                       supervision_watchdog_data = xmltodict.parse(supervision_watchdog_reply_xml)
-                      nc.supervision_reply_json_logger.info('', extra={'data': supervision_watchdog_data})
+                      nc.supervision_reply_json_logger.info('', extra={'data': json.dumps(supervision_watchdog_data)})
                       # It must be the same interval as <supervision-notification-interval>
                       time.sleep(60)
                   else:
diff --git a/software/ors-amarisoft/ncclient_common.py b/software/ors-amarisoft/ncclient_common.py
index 2433c798c..28cbeadef 100644
--- a/software/ors-amarisoft/ncclient_common.py
+++ b/software/ors-amarisoft/ncclient_common.py
@@ -1,5 +1,6 @@
 import time
 import logging
+import json
 import xmltodict
 from logging.handlers import RotatingFileHandler
 from ncclient import manager
@@ -111,15 +112,15 @@ class LopcommNetconfClient:
               result_in_xml = result._raw
               data_dict = xmltodict.parse(result_in_xml)
               if 'alarm-notif' in data_dict['notification']:
-                self.json_logger.info('', extra={'data': data_dict})
+                self.json_logger.info('', extra={'data': json.dumps(data_dict)})
               elif 'supervision-notification' in data_dict['notification']:
-                self.supervision_json_logger.info('', extra={'data': data_dict})
+                self.supervision_json_logger.info('', extra={'data': json.dumps(data_dict)})
               elif 'netconf-session-start' in data_dict['notification'] or 'netconf-session-end' in data_dict['notification']:
-                self.ncsession_json_logger.info('', extra={'data': data_dict})
+                self.ncsession_json_logger.info('', extra={'data': json.dumps(data_dict)})
               elif any(event in data_dict['notification'] for event in ['install-event', 'activation-event', 'download-event']):
-                  self.software_json_logger.info('', extra={'data': data_dict})
+                  self.software_json_logger.info('', extra={'data': json.dumps(data_dict)})
               else:
-                self.cfg_json_logger.info('', extra={'data': data_dict})
+                self.cfg_json_logger.info('', extra={'data': json.dumps(data_dict)})
     def edit_config(self, config_files):
         for config_file in config_files:
             with open(config_file) as f:
@@ -152,7 +153,7 @@ class LopcommNetconfClient:
         reset_reply_xml = self.custom_rpc_request(reset_rpc_xml)
         if reset_reply_xml:
             reset_data = xmltodict.parse(reset_reply_xml)
-            self.software_reply_json_logger.info('', extra={'data': reset_data})
+            self.software_reply_json_logger.info('', extra={'data': json.dumps(reset_data)})
         self.logger.info('Wait 60 second then reboot!')
         time.sleep(60)
 
@@ -169,7 +170,7 @@ class LopcommNetconfClient:
         if inventory_reply_xml:
             self.logger.info('Finish fetching software inventory.')
             inventory_data = xmltodict.parse(inventory_reply_xml)
-            self.software_reply_json_logger.info('', extra={'data': inventory_data})
+            self.software_reply_json_logger.info('', extra={'data': json.dumps(inventory_data)})
 
         nonrunning_slot_name = None
         running_slot_name = None
-- 
2.30.9