Count cycles in Android package dependency graphs

A new script has been added to count and optionally output cycles in package graphs. A limit to cycle size must be supplied, ideally around 5 or 6 to keep the script runtime low since the number of cycles is exponential. For more detailed usage, see `./count_cycles.py -h`. Bug: 1106484 Change-Id: I01923eaab24eb3c6641a23039c80f60a0bc0e212 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2303054 Commit-Queue: James Long <yjlong@google.com> Reviewed-by: Samuel Huang <huangs@chromium.org> Reviewed-by: Mohamed Heikal <mheikal@chromium.org> Cr-Commit-Position: refs/heads/master@{#791996}

Count cycles in Android package dependency graphs
A new script has been added to count and optionally output cycles in package graphs. A limit to cycle size must be supplied, ideally around 5 or 6 to keep the script runtime low since the number of cycles is exponential. For more detailed usage, see `./count_cycles.py -h`. Bug: 1106484 Change-Id: I01923eaab24eb3c6641a23039c80f60a0bc0e212 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2303054 Commit-Queue: James Long <yjlong@google.com> Reviewed-by: Samuel Huang <huangs@chromium.org> Reviewed-by: Mohamed Heikal <mheikal@chromium.org> Cr-Commit-Position: refs/heads/master@{#791996}
a0465fa2 · James Long · Commit Bot · d6265a0a · a0465fa2 · a0465fa2
Commit a0465fa2 authored Jul 27, 2020 by James Long Committed by Commit Bot Jul 27, 2020
3 changed files
--- a/tools/android/dependency_analysis/count_cycles.py
+++ b/tools/android/dependency_analysis/count_cycles.py
+#!/usr/bin/env python3
+# Copyright 2020 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Command-line tool to enumerate cycles in Graph structures."""
+
+import argparse
+import collections
+
+from typing import Dict, List
+
+import serialization
+import graph
+
+
+def find_cycles_from_node(
+    start_node: graph.Node,
+    max_cycle_length: int,
+    node_to_id: Dict[graph.Node, int],
+) -> List[List[List[graph.Node]]]:
+    """Finds all cycles starting at |start_node| in a subset of nodes.
+
+    Only nodes with ID >= |start_node|'s ID will be considered. This ensures
+    uniquely counting all cycles since this function is called on all nodes of
+    the graph, one at a time in increasing order. Some justification: Consider
+    cycle C with smallest node n. When this function is called on node n, C will
+    be found since all nodes of C are >= n. After that call, C will never be
+    found again since further calls are on nodes > n (n is removed from the
+    search space).
+
+    Cycles are found by recursively scanning all outbound nodes starting from
+    |start_node|, up to a certain depth. Note this is the same idea, but is
+    different from DFS since nodes can be visited more than once (to avoid
+    missing cycles). An example of normal DFS (where nodes can only be visited
+    once) missing cycles is in the following graph, starting at a:
+    a <-> b <-> c
+    ^           ^
+    |           |
+    +-----------+
+    DFS(a)
+        DFS(b)
+            DFS(a) (cycle aba, return)
+            DFS(c)
+                DFS(b) (already seen, return)
+                DFS(a) (cycle abca, return)
+        DFS(c) (already seen, return)
+    Since DFS(c) cannot proceed, we miss the cycles aca and acba.
+
+    Args:
+        start_node: The node to start the cycle search from. Only nodes with ID
+          >= |start_node|'s ID will be considered.
+        max_cycle_length: The maximum length of cycles to be found.
+        node_to_id: A map from a Node to a generated ID.
+
+    Returns:
+        A list |cycles| of length |max_cycle_length| + 1, where cycles[i]
+          contains all relevant cycles of length i.
+    """
+    start_node_id = node_to_id[start_node]
+    cycles = [[] for _ in range(max_cycle_length + 1)]
+
+    def edge_is_interesting(start: graph.Node, end: graph.Node) -> bool:
+        if start == end:
+            # Ignore self-loops.
+            return False
+        if node_to_id[end] < start_node_id:
+            # Ignore edges ending at nodes with ID lower than the start.
+            return False
+        return True
+
+    dfs_stack = collections.deque()
+    on_stack: Dict[graph.Node, bool] = collections.defaultdict(bool)
+
+    def find_cycles_dfs(cur_node: graph.Node, cur_length: int):
+        for other_node in cur_node.outbound:
+            if edge_is_interesting(cur_node, other_node):
+                if other_node == start_node:
+                    # We have found a valid cycle, add it to the list.
+                    new_cycle = list(dfs_stack) + [cur_node, start_node]
+                    cycles[cur_length + 1].append(new_cycle)
+
+                elif (not on_stack[other_node]
+                      and cur_length + 1 < max_cycle_length):
+                    # We are only allowed to recurse into the next node if:
+                    # 1) It hasn't been visited in the current cycle. This is
+                    # because if the next node n _has_ been visited in the
+                    # current cycle (i.e., it's on the stack), then we have
+                    # found a cycle starting and ending at n. Since this
+                    # function only returns cycles starting at |start_node|, we
+                    # only care if |n = start_node| (which we already detect
+                    # above).
+                    # 2) It would not exceed the maximum depth allowed.
+                    dfs_stack.append(cur_node)
+                    on_stack[cur_node] = True
+                    find_cycles_dfs(other_node, cur_length + 1)
+                    dfs_stack.pop()
+                    on_stack[cur_node] = False
+
+    find_cycles_dfs(start_node, 0)
+    return cycles
+
+
+def find_cycles(base_graph: graph.Graph,
+                max_cycle_length: int) -> List[List[List[graph.Node]]]:
+    """Finds all cycles in the graph within a certain length.
+
+    The algorithm is as such: Number the nodes arbitrarily. For i from 0 to
+    the number of nodes, find all cycles starting and ending at node i using
+    only nodes with numbers >= i (see find_cycles_from_node). Taking the union
+    of the results will give all relevant cycles in the graph.
+
+    Returns:
+        A list |cycles| of length |max_cycle_length| + 1, where cycles[i]
+          contains all cycles of length i.
+    """
+    sorted_base_graph_nodes = sorted(base_graph.nodes)
+    # Some preliminary setup: map between the graph nodes' unique keys and a
+    # unique number, since the algorithm needs some way to decide when a node is
+    # 'bigger'. Nodes with a lower number will be processed first, which
+    # influences the output cycles. For example, the cycle abca is also valid as
+    # the cycle bcab or cabc. By numbering node a lower than b and c, it is
+    # guaranteed that the cycle will be output as abca.
+    node_to_id = {}
+    for generated_node_id, node in enumerate(sorted_base_graph_nodes):
+        node_to_id[node] = generated_node_id
+
+    num_nodes = base_graph.num_nodes
+    cycles = [[] for _ in range(max_cycle_length + 1)]
+
+    for start_node in sorted_base_graph_nodes:
+        start_node_cycles = find_cycles_from_node(start_node, max_cycle_length,
+                                                  node_to_id)
+        for cycle_length, cycle_list in enumerate(start_node_cycles):
+            cycles[cycle_length].extend(cycle_list)
+    return cycles
+
+
+def main():
+    """Enumerates the cycles within a certain length in a graph."""
+
+    arg_parser = argparse.ArgumentParser(
+        description='Given a JSON dependency graph, count the number of cycles '
+        'in the package graph.')
+    required_arg_group = arg_parser.add_argument_group('required arguments')
+    required_arg_group.add_argument(
+        '-f',
+        '--file',
+        required=True,
+        help='Path to the JSON file containing the dependency graph. '
+        'See the README on how to generate this file.')
+    required_arg_group.add_argument(
+        '-l',
+        '--cycle-length',
+        type=int,
+        required=True,
+        help='The maximum length of cycles to find, at most 5 or 6 to keep the '
+        'script runtime low.')
+    arg_parser.add_argument(
+        '-o',
+        '--output',
+        type=argparse.FileType('w'),
+        help='Path to the file to write the list of cycles to.')
+    args = arg_parser.parse_args()
+
+    _, package_graph = serialization.load_class_and_package_graphs_from_file(
+        args.file)
+
+    all_cycles = find_cycles(package_graph, args.cycle_length)
+    # There are no cycles of length 0 or 1 (since self-loops are disallowed).
+    nonzero_cycles = all_cycles[2:]
+
+    print(f'Found {sum(len(cycles) for cycles in nonzero_cycles)} cycles.')
+
+    for cycle_length, cycles in enumerate(nonzero_cycles, 2):
+        print(f'Found {len(cycles)} cycles of length {cycle_length}.')
+
+    if args.output is not None:
+        print(f'Dumping cycles to {args.output.name}.')
+        with args.output as output_file:
+            for cycle_length, cycles in enumerate(nonzero_cycles, 2):
+                output_file.write(f'Cycles of length {cycle_length}:\n')
+                cycle_texts = []
+                for cycle in cycles:
+                    cycle_texts.append(' > '.join(cycle_node.name
+                                                  for cycle_node in cycle))
+                output_file.write('\n'.join(sorted(cycle_texts)))
+                output_file.write('\n')
+
+
+if __name__ == '__main__':
+    main()
--- a/tools/android/dependency_analysis/count_cycles_unittest.py
+++ b/tools/android/dependency_analysis/count_cycles_unittest.py
+#!/usr/bin/env python3
+# Copyright 2020 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Unit tests for dependency_analysis.count_cycles."""
+
+import itertools
+import unittest
+
+import count_cycles
+import graph
+
+
+class TestFindCycles(unittest.TestCase):
+    """Unit tests for find_cycles."""
+    KEY_0 = '0'
+    KEY_1 = '1'
+    KEY_2 = '2'
+    KEY_3 = '3'
+    MAX_CYCLE_LENGTH = 10
+
+    def test_no_self_cycles(self):
+        """Tests that self-cycles are not considered.
+
+        0 <---+
+        ^     |
+        |     v
+        +---> 1 (plus, 0 and 1 have self-cycles)
+        (one cycle, 010)
+        """
+        test_graph = graph.Graph()
+        test_graph.add_edge_if_new(self.KEY_0, self.KEY_1)
+        test_graph.add_edge_if_new(self.KEY_1, self.KEY_0)
+        test_graph.add_edge_if_new(self.KEY_0, self.KEY_0)
+        test_graph.add_edge_if_new(self.KEY_1, self.KEY_1)
+
+        res = count_cycles.find_cycles(test_graph, self.MAX_CYCLE_LENGTH)
+        expected_cycles = {
+            2: 1,
+        }
+        for cycle_length, cycles in enumerate(res):
+            self.assertEqual(len(cycles), expected_cycles.get(cycle_length, 0))
+
+    def test_big_cycle(self):
+        """Tests using a graph with one big cycle.
+
+        0 -> 1
+        ^    |
+        |    v
+        3 <- 2
+        (one cycle, 01230)
+        """
+        test_graph = graph.Graph()
+        test_graph.add_edge_if_new(self.KEY_0, self.KEY_1)
+        test_graph.add_edge_if_new(self.KEY_1, self.KEY_2)
+        test_graph.add_edge_if_new(self.KEY_2, self.KEY_3)
+        test_graph.add_edge_if_new(self.KEY_3, self.KEY_0)
+
+        res = count_cycles.find_cycles(test_graph, self.MAX_CYCLE_LENGTH)
+        expected_cycles = {
+            4: 1,
+        }
+        for cycle_length, cycles in enumerate(res):
+            self.assertEqual(len(cycles), expected_cycles.get(cycle_length, 0))
+
+    def test_multiple_cycles(self):
+        """Tests using a graph with multiple cycles.
+
+        0 -> 1
+        ^    ^
+        |    v
+        +--- 2 -> 3
+        (two cycles, 0120 and 121)
+        """
+        test_graph = graph.Graph()
+        test_graph.add_edge_if_new(self.KEY_0, self.KEY_1)
+        test_graph.add_edge_if_new(self.KEY_1, self.KEY_2)
+        test_graph.add_edge_if_new(self.KEY_2, self.KEY_0)
+        test_graph.add_edge_if_new(self.KEY_2, self.KEY_1)
+        test_graph.add_edge_if_new(self.KEY_2, self.KEY_3)
+
+        res = count_cycles.find_cycles(test_graph, self.MAX_CYCLE_LENGTH)
+        expected_cycles = {
+            2: 1,
+            3: 1,
+        }
+        for cycle_length, cycles in enumerate(res):
+            self.assertEqual(len(cycles), expected_cycles.get(cycle_length, 0))
+
+    def test_complete_graph(self):
+        """Tests using a complete graph on 4 nodes.
+
+        +------------+
+        v            |
+        0 <> 1 <--+  |
+        ^    ^    |  |
+        |    v    v  |
+        +--> 2 <> 3 <+
+        (20 cycles,
+        010, 020, 030, 121, 131, 232,
+        0120, 0130, 0210, 0230, 0310, 0320, 1231, 1321,
+        01230, 01320, 02130, 02310, 03120, 03210)
+        """
+        test_graph = graph.Graph()
+        for ka, kb in itertools.permutations(
+            [self.KEY_0, self.KEY_1, self.KEY_2, self.KEY_3], 2):
+            test_graph.add_edge_if_new(ka, kb)
+
+        res = count_cycles.find_cycles(test_graph, self.MAX_CYCLE_LENGTH)
+        expected_cycles = {2: 6, 3: 8, 4: 6}
+        for cycle_length, cycles in enumerate(res):
+            self.assertEqual(len(cycles), expected_cycles.get(cycle_length, 0))
+
+    def test_complete_graph_restricted_length(self):
+        """Tests using a complete graph on 4 nodes with maximum cycle length 2.
+
+        +------------+
+        v            |
+        0 <> 1 <--+  |
+        ^    ^    |  |
+        |    v    v  |
+        +--> 2 <> 3 <+
+        (6 cycles, 010, 020, 030, 121, 131, 232)
+        """
+        test_graph = graph.Graph()
+        for ka, kb in itertools.permutations(
+            [self.KEY_0, self.KEY_1, self.KEY_2, self.KEY_3], 2):
+            test_graph.add_edge_if_new(ka, kb)
+
+        res = count_cycles.find_cycles(test_graph, 2)
+        expected_cycles = {2: 6}
+        for cycle_length, cycles in enumerate(res):
+            self.assertEqual(len(cycles), expected_cycles.get(cycle_length, 0))
--- a/tools/android/dependency_analysis/graph.py
+++ b/tools/android/dependency_analysis/graph.py
@@ -3,6 +3,7 @@
 # found in the LICENSE file.
 """Utility classes (and functions, in the future) for graph operations."""

+import functools
 from typing import Dict, List, Optional, Tuple


@@ -18,6 +19,7 @@ def sorted_edges_by_name(edges):
    return sorted(edges, key=lambda edge: (edge[0].name, edge[1].name))


+@functools.total_ordering
 class Node(object):  # pylint: disable=useless-object-inheritance
    """A node/vertex in a directed graph."""
    def __init__(self, unique_key: str):
@@ -33,6 +35,9 @@ class Node(object):  # pylint: disable=useless-object-inheritance
    def __eq__(self, other: 'Node'):  # pylint: disable=missing-function-docstring
        return self._unique_key == other._unique_key

+    def __lt__(self, other: 'Node'):  # pylint: disable=missing-function-docstring
+        return self._unique_key < other._unique_key
+
    def __hash__(self):  # pylint: disable=missing-function-docstring
        return hash(self._unique_key)