Implement automatic document generation for Network Traffic Annotations

This generator uses grouping.xml and annotations.tsv to generate a Google Docs sheet for sys-admin clients. Refer to the README for additional guidance. Bug: 1107860 Change-Id: Ia9350fdc049e0e4b0ac2830968d9e60309bc3c27 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2283487 Commit-Queue: Mohamadou Bella Bah <bellabah@chromium.org> Reviewed-by: Nicolas Ouellet-Payeur <nicolaso@chromium.org> Reviewed-by: Ramin Halavati <rhalavati@chromium.org> Cr-Commit-Position: refs/heads/master@{#795068}

Implement automatic document generation for Network Traffic Annotations
This generator uses grouping.xml and annotations.tsv to generate a Google Docs sheet for sys-admin clients. Refer to the README for additional guidance. Bug: 1107860 Change-Id: Ia9350fdc049e0e4b0ac2830968d9e60309bc3c27 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2283487 Commit-Queue: Mohamadou Bella Bah <bellabah@chromium.org> Reviewed-by: Nicolas Ouellet-Payeur <nicolaso@chromium.org> Reviewed-by: Ramin Halavati <rhalavati@chromium.org> Cr-Commit-Position: refs/heads/master@{#795068}
2b772c13 · Bella Bah · Commit Bot · 04d1ee45 · 2b772c13 · 2b772c13
Commit 2b772c13 authored Aug 05, 2020 by Bella Bah Committed by Commit Bot Aug 05, 2020
10 changed files
--- a/docs/network_traffic_annotations.md
+++ b/docs/network_traffic_annotations.md
@@ -289,7 +289,7 @@ change list. These checks include:
  `tools/traffic_annotation/summary/grouping.xml`. When adding a new annotation,
  it must also be included in `grouping.xml` for reporting purposes (please
  refer to the **Annotations Review**).
-  
+

 ### Presubmit tests
 To perform tests prior to submit, one can use the `traffic_annotation_auditor`
@@ -324,7 +324,7 @@ one is updated, or deleted, this file should also be updated. To update the
 as specified in presubmit tests. But if it is not possible to do so (e.g., if
 you are changing the code from an unsupported platform or you don’t have a
 compiled build directory), the code can be submitted to the trybot and the test
-on trybot will tell you the required modifications. 
+on trybot will tell you the required modifications.

 In order to help make external reports easier, annotation unique ids should be
 mentioned in `tools/traffic_annotation/summary/grouping.xml`. Once a new

--- a/tools/traffic_annotation/scripts/README.md
+++ b/tools/traffic_annotation/scripts/README.md
@@ -29,3 +29,25 @@ annotations in code. It uses regex expressions on source files.

 # extractor_test.py
 Unit tests for extractor.py.
+
+# update_annotations_doc.py
+Updates the Chrome Browser Network Traffic Annotations document that presents
+all network traffic annotations specified within `summary/grouping.xml`.
+  - You can use the `hidden="true"` attribute within a group to suppress the
+    group and its nested senders and annotations from appearing in the document.
+  - You can use the `hidden="true"` attribute within the annotations in
+    `grouping.xml` to suppress them from appearing in the document.
+  - `grouping.xml` needn't be organized in alphabetical order, the script
+    automatically places them in alphabetical order.
+
+# update_annotations_doc_tests.py
+Unit tests for update_annotations_doc.py.
+
+# parser.py
+Parses the `grouping.xml` and `annotations.tsv` files to provide
+`update_annotations_doc.py` with the annotations and their relevant information,
+e.g. unique_id, data, trigger, etc. Also includes methods to parse the json
+object returned by the Google Docs API `get()` method.
+
+# parser_tests.py
+Unit tests for parser.py.
\ No newline at end of file
--- a/tools/traffic_annotation/scripts/parser.py
+++ b/tools/traffic_annotation/scripts/parser.py
--- a/tools/traffic_annotation/scripts/parser_tests.py
+++ b/tools/traffic_annotation/scripts/parser_tests.py
+#!/usr/bin/env python
+# Copyright 2020 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""
+Unit tests for parser.py
+"""
+
+import unittest
+import parser
+import os
+
+# Absolute path to chrome/src.
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+SRC_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "../../.."))
+TESTS_DIR = os.path.join(SCRIPT_DIR, "test_data")
+
+class ParserTest(unittest.TestCase):
+  TSV_CONTENTS = [
+    [
+      u"unique_id_A", u"", u"sender_A", u"description_A", u"trigger_A",
+      u"data_A", u"destination_A", u"cookies_allowed_A", u"cookies_store_A",
+      u"settings_A", u"chrome_policy_A", u"", u"source_file_A",
+      u"id_hash_code_A", u"content_hash_code_A"],
+    [
+      u"unique_id_B", u"", u"sender_B", u"description_B", u"trigger_B",
+      u"data_B", u"destination_B", u"cookies_allowed_B", u"cookies_store_B",
+      u"settings_B", u"chrome_policy_B", u"", u"source_file_B",
+      u"id_hash_code_B", u"content_hash_code_B"],
+    [
+      u"unique_id_C", u"", u"sender_C", u"description_C", u"trigger_C",
+      u"data_C", u"destination_C", u"cookies_allowed_C", u"cookies_store_C",
+      u"settings_C", u"chrome_policy_C", u"", u"source_file_C",
+      u"id_hash_code_C", u"content_hash_code_C"]
+  ]
+
+  ANNOTATIONS_MAPPING = {
+    "unique_id_A": parser.TrafficAnnotation(**{
+      "unique_id": "unique_id_A",
+      "description": "description_A",
+      "trigger": "trigger_A",
+      "data": "data_A",
+      "settings": "settings_A",
+      "policy": "chrome_policy_A"}),
+    "unique_id_B": parser.TrafficAnnotation(**{
+      "unique_id": "unique_id_B",
+      "description": "description_B",
+      "trigger": "trigger_B",
+      "data": "data_B",
+      "settings": "settings_B",
+      "policy": "chrome_policy_B"}),
+    "unique_id_C": parser.TrafficAnnotation(**{
+      "unique_id": "unique_id_C",
+      "description": "description_C",
+      "trigger": "trigger_C",
+      "data": "data_C",
+      "settings": "settings_C",
+      "policy": "chrome_policy_C"})
+  }
+
+  PLACEHOLDERS = [
+    {"type": parser.Placeholder.GROUP, "name": "Group A"},
+    {"type": parser.Placeholder.SENDER, "name": "Sender 1"},
+    {
+      "type": parser.Placeholder.ANNOTATION,
+      "traffic_annotation": ANNOTATIONS_MAPPING["unique_id_A"]},
+    {"type": parser.Placeholder.SENDER, "name": "Sender 2"},
+    {
+      "type": parser.Placeholder.ANNOTATION,
+      "traffic_annotation": ANNOTATIONS_MAPPING["unique_id_B"]},
+    {"type": parser.Placeholder.GROUP, "name": "Group C"},
+    {"type": parser.Placeholder.SENDER, "name": "Sender 3"},
+    {
+      "type": parser.Placeholder.ANNOTATION,
+      "traffic_annotation": ANNOTATIONS_MAPPING["unique_id_C"]}
+  ]
+
+  # Document formatted according to fake_grouping.xml
+  DOC_JSON = parser.extract_body(
+    target="all", json_file_path=os.path.join(TESTS_DIR, "fake_doc.json"))
+
+  def test_load_tsv_file(self):
+    self.assertEqual(self.TSV_CONTENTS, parser.load_tsv_file(os.path.join(
+      SRC_DIR,
+      "tools/traffic_annotation/scripts/test_data/fake_annotations.tsv"),
+      False))
+
+  def test_map_annotations(self):
+    self.assertEqual(
+      self.ANNOTATIONS_MAPPING, parser.map_annotations(self.TSV_CONTENTS))
+
+  def test_xml_parser_build_placeholders(self):
+    xml_parser = parser.XMLParser(
+      os.path.join(TESTS_DIR, "fake_grouping.xml"), self.ANNOTATIONS_MAPPING)
+    self.assertEqual(self.PLACEHOLDERS, xml_parser.build_placeholders())
+
+  def test_find_first_index(self):
+    first_index = parser.find_first_index(self.DOC_JSON)
+    self.assertEqual(1822, first_index)
+
+  def test_find_last_index(self):
+    last_index = parser.find_last_index(self.DOC_JSON)
+    self.assertEqual(2066, last_index)
+
+  def test_find_chrome_browser_version(self):
+    current_version = parser.find_chrome_browser_version(self.DOC_JSON)
+    self.assertEqual("86.0.4187.0", current_version)
+
+  def test_find_bold_ranges(self):
+    expected_bold_ranges = [
+      (1843, 1855), (1859, 1867), (1871, 1876), (1880, 1889), (1893, 1900),
+      (1918, 1930), (1934, 1942), (1968, 1975), (1946, 1951), (1955, 1964),
+      (2001, 2013), (2017, 2025), (2029, 2034), (2038, 2047), (2051, 2058)]
+    bold_ranges = parser.find_bold_ranges(self.DOC_JSON)
+    self.assertItemsEqual(expected_bold_ranges, bold_ranges)
+
+
+if __name__ == "__main__":
+  unittest.main()
\ No newline at end of file
--- a/tools/traffic_annotation/scripts/test_data/fake_annotations.tsv
+++ b/tools/traffic_annotation/scripts/test_data/fake_annotations.tsv
+unique_id_A		sender_A	description_A	trigger_A	data_A	destination_A	cookies_allowed_A	cookies_store_A	settings_A	chrome_policy_A		source_file_A	id_hash_code_A	content_hash_code_A
+unique_id_B		sender_B	description_B	trigger_B	data_B	destination_B	cookies_allowed_B	cookies_store_B	settings_B	chrome_policy_B		source_file_B	id_hash_code_B	content_hash_code_B
+unique_id_C		sender_C	description_C	trigger_C	data_C	destination_C	cookies_allowed_C	cookies_store_C	settings_C	chrome_policy_C		source_file_C	id_hash_code_C	content_hash_code_C
--- a/tools/traffic_annotation/scripts/test_data/fake_grouping.xml
+++ b/tools/traffic_annotation/scripts/test_data/fake_grouping.xml
+<groups>
+  <group name="Group A">
+    <sender name="Sender 1">
+      <traffic_annotation unique_id="unique_id_A"/>
+    </sender>
+    <sender name="Sender 2">
+      <traffic_annotation unique_id="unique_id_B"/>
+    </sender>
+  </group>
+  <group name="Group C">
+    <sender name="Sender 3">
+      <traffic_annotation unique_id="unique_id_C"/>
+    </sender>
+  </group>
+</groups>
\ No newline at end of file
--- a/tools/traffic_annotation/scripts/update_annotations_doc.py
+++ b/tools/traffic_annotation/scripts/update_annotations_doc.py
--- a/tools/traffic_annotation/scripts/update_annotations_doc.py.vpython
+++ b/tools/traffic_annotation/scripts/update_annotations_doc.py.vpython
+python_version: "2.7"
+
+wheel: <
+  name: "infra/python/wheels/google_api_python_client-py2_py3"
+  version: "version:1.6.2"
+>
+
+wheel: <
+  name: "infra/python/wheels/oauth2client-py2_py3"
+  version: "version:4.0.0"
+>
+
+wheel: <
+  name: "infra/python/wheels/uritemplate-py2_py3"
+  version: "version:3.0.0"
+>
+
+wheel: <
+  name: "infra/python/wheels/enum34-py2"
+  version: "version:1.1.6"
+>
+
+wheel: <
+  name: "infra/python/wheels/httplib2-py2_py3"
+  version: "version:0.12.1"
+>
+
+wheel: <
+  name: "infra/python/wheels/rsa-py2_py3"
+  version: "version:3.4.2"
+>
+
+wheel: <
+  name: "infra/python/wheels/pyasn1-py2_py3"
+  version: "version:0.2.3"
+>
+
+wheel: <
+  name: "infra/python/wheels/pyasn1_modules-py2_py3"
+  version: "version:0.0.8"
+>
+
+wheel: <
+  name: "infra/python/wheels/six-py2_py3"
+  version: "version:1.10.0"
+>
+
+wheel: <
+  name: "infra/python/wheels/infra_libs-py2"
+  version: "version:2.0.0"
+>
+
+wheel: <
+  name: "infra/python/wheels/protobuf-py2_py3"
+  version: "version:3.2.0"
+>
+
+wheel: <
+  name: "infra/python/wheels/requests-py2_py3"
+  version: "version:2.13.0"
+>
\ No newline at end of file
--- a/tools/traffic_annotation/scripts/update_annotations_doc_tests.py
+++ b/tools/traffic_annotation/scripts/update_annotations_doc_tests.py
+#!/usr/bin/env python
+# Copyright 2020 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""
+Unit tests for update_annotations_doc.py
+"""
+
+import os
+import sys
+import unittest
+from mock import MagicMock
+
+# Mock some imports which aren't necessary during testing.
+sys.modules["infra_libs"] = MagicMock()
+import update_annotations_doc
+import parser
+
+# Absolute path to chrome/src.
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+SRC_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "../../.."))
+TESTS_DIR = os.path.join(SCRIPT_DIR, "test_data")
+
+
+class UpdateAnnotationsDocTest(unittest.TestCase):
+  network_doc_obj = update_annotations_doc.NetworkTrafficAnnotationsDoc(
+    "", "", "", "", "")
+
+  def test_create_group_request(self):
+    text = "TestGroup"
+    req, index = self.network_doc_obj._create_group_or_sender_request(
+      text, 0, parser.Placeholder.GROUP)
+
+    self.assertEqual(len(text)+1, index)
+    expected_req = [
+      {"insertText": {"text": "TestGroup\n", "location": {"index": 0}}},
+      {"updateParagraphStyle": {
+        "fields": "*",
+        "range": {"endIndex": 10, "startIndex": 0},
+        "paragraphStyle": {
+          "spacingMode": "NEVER_COLLAPSE",
+          "direction": "LEFT_TO_RIGHT",
+          "namedStyleType": "HEADING_1",
+          "spaceAbove": {"unit": "PT"}
+          }
+        }
+      },
+      {"updateTextStyle": {
+        "textStyle": {
+          "fontSize": {"magnitude": 20, "unit": "PT"},
+          "bold": False,
+          "weightedFontFamily": {
+            "fontFamily": "Roboto",
+            "weight": 400
+          }
+        },
+        "range": {"endIndex": 10, "startIndex": 0}, "fields": "*"}}
+    ]
+    self.assertEqual(expected_req, req)
+
+  def test_create_sender_request(self):
+    text = "TestSender"
+    print(text)
+    req, index = self.network_doc_obj._create_group_or_sender_request(
+        text, 0, parser.Placeholder.SENDER)
+
+    self.assertEqual(len(text)+1, index)
+    expected_req = [
+      {"insertText": {"text": "TestSender\n", "location": {"index": 0}}},
+      {"updateParagraphStyle": {
+        "fields": "*",
+        "range": {"endIndex": 11, "startIndex": 0},
+        "paragraphStyle": {
+          "spacingMode": "NEVER_COLLAPSE",
+          "direction": "LEFT_TO_RIGHT",
+          "namedStyleType": "HEADING_2",
+          "spaceAbove": {"unit": "PT"}
+          }
+        }
+      },
+      {"updateTextStyle": {
+        "textStyle": {"fontSize": {"magnitude": 14, "unit": "PT"},
+        "bold": True,
+        "weightedFontFamily": {"fontFamily": "Roboto", "weight": 400}},
+        "range": {"endIndex": 11, "startIndex": 0}, "fields": "*"}
+        }
+    ]
+    self.assertEqual(expected_req, req)
+
+  def test_create_annotation_request(self):
+    traffic_annotation = parser.TrafficAnnotation(**{
+      "unique_id": "unique_id_A",
+      "description": "description_A",
+      "trigger": "trigger_A",
+      "data": "data_A",
+      "settings": "settings_A",
+      "policy": "chrome_policy_A"})
+
+    req, index = self.network_doc_obj._create_annotation_request(
+      traffic_annotation, 0)
+
+    self.assertEqual(109, index)
+    expected_req = [
+      {'insertText': {'text': '\n', 'location': {'index': 0}}},
+      {'insertTable': {'rows': 1, 'location': {'index': 0}, 'columns': 2}},
+      {'insertText': {'text': 'unique_id_A', 'location': {'index': 4}}},
+      {
+        'insertText': {
+          'text': "description_A\nTrigger: trigger_A\nData: data_A\nSettings: "
+          "settings_A\nPolicy: chrome_policy_A", 'location': {'index': 17}}},
+      {'updateTableColumnProperties': {
+        'columnIndices': [0],
+        'fields': '*',
+        'tableColumnProperties': {
+          'width': {'magnitude': 153, 'unit': 'PT'},
+          'widthType': 'FIXED_WIDTH'},
+          'tableStartLocation': {'index': 1}}},
+      {'updateTableColumnProperties': {
+        'columnIndices': [1],
+        'fields': '*',
+        'tableColumnProperties': {
+          'width': {'magnitude': 534, 'unit': 'PT'},'widthType': 'FIXED_WIDTH'},
+        'tableStartLocation': {'index': 1}}},
+      {'updateTableCellStyle': {
+        'fields': '*',
+        'tableCellStyle': {
+          'rowSpan': 1,
+          'borderBottom': {
+            'color': {
+              'color': {'rgbColor': {'blue': 1.0, 'green': 1.0, 'red': 1.0}}},
+            'width': {'unit': 'PT'}, 'dashStyle': 'SOLID'},
+          'paddingBottom': {'magnitude': 1.44, 'unit': 'PT'},
+          'paddingLeft': {'magnitude': 1.44, 'unit': 'PT'},
+          'paddingTop': {'magnitude': 1.44, 'unit': 'PT'},
+          'borderLeft': {
+            'color': {
+              'color': {'rgbColor': {'blue': 1.0, 'green': 1.0, 'red': 1.0}}},
+            'width': {'unit': 'PT'},
+            'dashStyle': 'SOLID'},
+          'columnSpan': 1,
+          'backgroundColor': {
+            'color': {'rgbColor': {'blue': 1.0, 'green': 1.0, 'red': 1.0}}},
+          'borderRight': {
+            'color': {
+              'color': {'rgbColor': {'blue': 1.0, 'green': 1.0, 'red': 1.0}}},
+            'width': {'unit': 'PT'},
+            'dashStyle': 'SOLID'},
+          'borderTop': {
+            'color': {
+              'color': {'rgbColor': {'blue': 1.0, 'green': 1.0, 'red': 1.0}}},
+              'width': {'unit': 'PT'},
+              'dashStyle': 'SOLID'},
+          'paddingRight': {'magnitude': 1.44, 'unit': 'PT'}},
+          'tableStartLocation': {'index': 1}}},
+        {'updateParagraphStyle': {
+          'fields': '*',
+          'range': {'endIndex': 108, 'startIndex': 4},
+          'paragraphStyle': {
+            'spacingMode': 'NEVER_COLLAPSE',
+            'direction': 'LEFT_TO_RIGHT',
+            'spaceBelow': {'magnitude': 4, 'unit': 'PT'},
+            'lineSpacing': 100,
+            'avoidWidowAndOrphan': False,
+            'namedStyleType': 'NORMAL_TEXT'}}},
+        {'updateTextStyle': {
+          'textStyle': {'fontSize': {'magnitude': 9, 'unit': 'PT'},
+          'bold': False,
+          'weightedFontFamily': {'fontFamily': 'Roboto', 'weight': 400}},
+          'range': {'endIndex': 108, 'startIndex': 4}, 'fields': '*'}}]
+    self.assertEqual(expected_req, req)
+
+
+if __name__ == "__main__":
+  unittest.main()
\ No newline at end of file
--- a/tools/traffic_annotation/scripts/update_annotations_sheet.py
+++ b/tools/traffic_annotation/scripts/update_annotations_sheet.py
@@ -29,6 +29,7 @@ from infra_libs import luci_auth
 from oauth2client import client
 from oauth2client import tools
 from oauth2client.file import Storage
+from parser import load_tsv_file


 class SheetEditor():
@@ -336,37 +337,6 @@ class SheetEditor():
                self.insert_count, self.update_count, self.delete_count)


-def utf_8_encoder(input_file):
-  for line in input_file:
-    yield line.encode("utf-8")
-
-
-def LoadTSVFile(file_path, verbose):
-  """ Loads annotations TSV file.
-
-  Args:
-    file_path: str Path to the TSV file.
-    verbose: bool Whether to print messages about ignored rows.
-
-  Returns:
-    list of list Table of loaded annotations.
-  """
-  rows = []
-  with io.open(file_path, mode="r", encoding="utf-8") as csvfile:
-    # CSV library does not support unicode, so encoding to utf-8 and back.
-    reader = csv.reader(utf_8_encoder(csvfile), delimiter='\t')
-    for row in reader:
-      row = [unicode(col, 'utf-8') for col in row]
-      # If the last column of the file_row is empty, the row belongs to a
-      # platform different from the one that TSV file is generated on, hence it
-      # should be ignored.
-      if row[-1]:
-        rows.append(row)
-      elif verbose:
-        print("Ignored from other platforms: %s" % row[0])
-  return rows
-
-
 def PrintConfigHelp():
  print("The config.json file should have the following items:\n"
        "spreadsheet_id:\n"
@@ -416,7 +386,7 @@ def main():
    config = json.load(config_file)

  # Load and parse annotations file.
-  file_content = LoadTSVFile(args.annotations_file, args.verbose)
+  file_content = load_tsv_file(args.annotations_file, args.verbose)
  if not file_content:
    print("Could not read annotations file.")
    return -1