Commit 97ee7828 authored by James Long's avatar James Long Committed by Commit Bot

Serialize jdeps class/package dependency graphs to JSON

In-memory representations of the two types of graphs were made
serializable/deserializable to and from JSON. A new module
`serialization` has been created to help with this process.

The end result is one .txt file containing the JSON dump of both the
class and package dependency graphs - more rationale on this format is
included in the docstring of serialization functions.

The performance of the de/serialization is very acceptable (within
a second on my machine). The filesize of the output JSON file is ~7MB,
which I also find acceptable. There are rooms for optimization in the
JSON structure if the filesize becomes an issue in the future.

Bug: 1081889
Change-Id: I03061372e0ed94aed62de58f41fb3f363f543b3f
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2219817
Commit-Queue: James Long <yjlong@google.com>
Reviewed-by: default avatarHenrique Nakashima <hnakashima@chromium.org>
Reviewed-by: default avatarMohamed Heikal <mheikal@chromium.org>
Cr-Commit-Position: refs/heads/master@{#773357}
parent a7a42ffc
...@@ -7,6 +7,7 @@ import re ...@@ -7,6 +7,7 @@ import re
from typing import Tuple from typing import Tuple
import graph import graph
import class_json_consts
# Matches w/o parens: (some.package.name).(class)$($optional$nested$class) # Matches w/o parens: (some.package.name).(class)$($optional$nested$class)
JAVA_CLASS_FULL_NAME_REGEX = re.compile( JAVA_CLASS_FULL_NAME_REGEX = re.compile(
...@@ -69,9 +70,30 @@ class JavaClass(graph.Node): ...@@ -69,9 +70,30 @@ class JavaClass(graph.Node):
"""A set of nested classes contained within this class.""" """A set of nested classes contained within this class."""
return self._nested_classes return self._nested_classes
@nested_classes.setter
def nested_classes(self, other):
self._nested_classes = other
def add_nested_class(self, nested: str): # pylint: disable=missing-function-docstring def add_nested_class(self, nested: str): # pylint: disable=missing-function-docstring
self._nested_classes.add(nested) self._nested_classes.add(nested)
def get_node_metadata(self):
"""Generates JSON metadata for the current node.
The list of nested classes is sorted in order to help with testing.
Structure:
{
'package': str,
'class': str,
'nested_classes': [ class_key, ... ],
}
"""
return {
class_json_consts.PACKAGE: self.package,
class_json_consts.CLASS: self.class_name,
class_json_consts.NESTED_CLASSES: sorted(self.nested_classes),
}
class JavaClassDependencyGraph(graph.Graph): class JavaClassDependencyGraph(graph.Graph):
"""A graph representation of the dependencies between Java classes. """A graph representation of the dependencies between Java classes.
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
"""Unit tests for dependency_analysis.class_dependency.""" """Unit tests for dependency_analysis.class_dependency."""
import unittest.mock import unittest.mock
import class_dependency import class_dependency
...@@ -102,8 +103,8 @@ class TestJavaClassDependencyGraph(unittest.TestCase): ...@@ -102,8 +103,8 @@ class TestJavaClassDependencyGraph(unittest.TestCase):
"""Tests that a jdeps name is correctly parsed into package + class.""" """Tests that a jdeps name is correctly parsed into package + class."""
created_node = self.test_graph.create_node_from_key( created_node = self.test_graph.create_node_from_key(
'package.class$nested') 'package.class$nested')
self.assertEqual(created_node._package, 'package') self.assertEqual(created_node.package, 'package')
self.assertEqual(created_node._class_name, 'class') self.assertEqual(created_node.class_name, 'class')
self.assertEqual(created_node.name, 'package.class') self.assertEqual(created_node.name, 'package.class')
def test_add_nested_class_to_key(self): def test_add_nested_class_to_key(self):
......
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Various class dependency constants used in de/serialization."""
# Node-specific constants
PACKAGE = 'package' # The package component of a class's full name.
CLASS = 'class' # The class component of a class's full name.
NESTED_CLASSES = 'nested_classes' # Nested classes of a class.
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# found in the LICENSE file. # found in the LICENSE file.
"""Utility classes (and functions, in the future) for graph operations.""" """Utility classes (and functions, in the future) for graph operations."""
from typing import List, Tuple from typing import Dict, List, Optional, Tuple
def sorted_nodes_by_name(nodes): def sorted_nodes_by_name(nodes):
...@@ -59,6 +59,12 @@ class Node(object): # pylint: disable=useless-object-inheritance ...@@ -59,6 +59,12 @@ class Node(object): # pylint: disable=useless-object-inheritance
"""Creates an edge from the provided node to the current node.""" """Creates an edge from the provided node to the current node."""
self._inbound.add(node) self._inbound.add(node)
def get_node_metadata(self) -> Optional[Dict]:
"""Generates JSON metadata for the current node.
If the returned dict is None, the metadata field will be excluded."""
return None
class Graph(object): # pylint: disable=useless-object-inheritance class Graph(object): # pylint: disable=useless-object-inheritance
"""A directed graph data structure. """A directed graph data structure.
...@@ -142,3 +148,9 @@ class Graph(object): # pylint: disable=useless-object-inheritance ...@@ -142,3 +148,9 @@ class Graph(object): # pylint: disable=useless-object-inheritance
self._edges.append((src_node, dest_node)) self._edges.append((src_node, dest_node))
return True return True
return False return False
def get_edge_metadata(self, begin_node, end_node) -> Optional[Dict]:
"""Generates JSON metadata for the current edge.
If the returned dict is None, the metadata field will be excluded."""
return None
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Various constants used in de/serialization."""
# Node-specific constants
NODES = 'nodes'
NAME = 'name'
# Edge-specific constants.
EDGES = 'edges'
BEGIN = 'begin'
END = 'end'
# Miscellaneous attributes.
PACKAGE_GRAPH = 'package_graph'
CLASS_GRAPH = 'class_graph'
META = 'meta'
...@@ -8,6 +8,7 @@ from typing import Set, Tuple ...@@ -8,6 +8,7 @@ from typing import Set, Tuple
import class_dependency import class_dependency
import graph import graph
import package_json_consts
class JavaPackage(graph.Node): class JavaPackage(graph.Node):
...@@ -75,6 +76,19 @@ class JavaPackage(graph.Node): ...@@ -75,6 +76,19 @@ class JavaPackage(graph.Node):
""" """
return self._class_dependency_edges[end_node] return self._class_dependency_edges[end_node]
def get_node_metadata(self):
"""Generates JSON metadata for the current node.
The list of classes is sorted in order to help with testing.
Structure:
{
'classes': [ class_key, ... ],
}
"""
return {
package_json_consts.CLASSES: sorted(self.classes.keys()),
}
class JavaPackageDependencyGraph(graph.Graph): class JavaPackageDependencyGraph(graph.Graph):
"""A graph representation of the dependencies between Java packages. """A graph representation of the dependencies between Java packages.
...@@ -107,6 +121,24 @@ class JavaPackageDependencyGraph(graph.Graph): ...@@ -107,6 +121,24 @@ class JavaPackageDependencyGraph(graph.Graph):
begin_package_node.add_class_dependency_edge( begin_package_node.add_class_dependency_edge(
end_package_node, begin_class, end_class) end_package_node, begin_class, end_class)
def create_node_from_key(self, package_name: str): def create_node_from_key(self, key: str):
"""Create a JavaPackage node from the given package name.""" """Create a JavaPackage node from the given key (package name)."""
return JavaPackage(package_name) return JavaPackage(key)
def get_edge_metadata(self, begin_node, end_node):
"""Generates JSON metadata for the current edge.
The list of edges is sorted in order to help with testing.
Structure:
{
'class_edges': [
[begin_key, end_key], ...
],
}
"""
return {
package_json_consts.CLASS_EDGES:
sorted(
[begin.name, end.name] for begin, end in
begin_node.get_class_dependencies_in_outbound_edge(end_node)),
}
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
import unittest.mock import unittest.mock
import graph
import package_dependency import package_dependency
......
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Various package dependency constants used in de/serialization."""
# Node-specific constants
CLASSES = 'classes' # Internal classes of a package.
# Edge-specific constants.
CLASS_EDGES = 'class_edges' # The class edges comprising a package edge.
...@@ -9,6 +9,7 @@ import subprocess ...@@ -9,6 +9,7 @@ import subprocess
import class_dependency import class_dependency
import package_dependency import package_dependency
import serialization
SRC_PATH = pathlib.Path(__file__).resolve().parents[3] # src/ SRC_PATH = pathlib.Path(__file__).resolve().parents[3] # src/
JDEPS_PATH = SRC_PATH.joinpath('third_party/jdk/current/bin/jdeps') JDEPS_PATH = SRC_PATH.joinpath('third_party/jdk/current/bin/jdeps')
...@@ -111,6 +112,16 @@ def main(): ...@@ -111,6 +112,16 @@ def main():
f"got {package_graph.num_nodes} nodes " f"got {package_graph.num_nodes} nodes "
f"and {package_graph.num_edges} edges.") f"and {package_graph.num_edges} edges.")
print('Dumping JSON representation to testing file '
'dependency_analysis/testfile.txt.')
testing_filepath = (f'{pathlib.Path(__file__).parent.absolute()}'
'/testfile.txt')
serialization.dump_class_and_package_graphs_to_file(
class_graph, package_graph, testing_filepath)
print('Recreating graph from the dumped JSON representation.')
serialization.load_class_and_package_graphs_from_file(testing_filepath)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Helper module for the de/serialization of graphs to/from files."""
import json
from typing import Dict, Tuple
import class_dependency
import class_json_consts
import graph
import json_consts
import package_dependency
def create_json_obj_from_node(node: graph.Node) -> Dict:
"""Generates a JSON representation of a given node.
Structure:
{
'name': str,
'meta': { see Node.get_node_metadata },
}
"""
json_obj = {
json_consts.NAME: node.name,
}
node_meta = node.get_node_metadata()
if node_meta is not None:
json_obj[json_consts.META] = node_meta
return json_obj
def create_json_obj_from_graph(graph_obj: graph.Graph) -> Dict:
"""Generates a JSON representation of the current graph.
The list of nodes and edges is sorted in order to help with testing.
Structure:
{
'nodes': [
{ see create_json_obj_from_node }, ...
],
'edges': [
{
'begin': str,
'end': str,
'meta': { see Graph.get_edge_metadata },
}, ...
],
}
"""
sorted_nodes = graph.sorted_nodes_by_name(graph_obj.nodes)
json_nodes = [create_json_obj_from_node(node) for node in sorted_nodes]
json_edges = []
for begin_node, end_node in graph.sorted_edges_by_name(graph_obj.edges):
edge_json_obj = {
json_consts.BEGIN: begin_node.name,
json_consts.END: end_node.name,
}
edge_meta = graph_obj.get_edge_metadata(begin_node, end_node)
if edge_meta is not None:
edge_json_obj[json_consts.META] = edge_meta
json_edges.append(edge_json_obj)
return {
json_consts.NODES: json_nodes,
json_consts.EDGES: json_edges,
}
def create_class_graph_from_json_obj(
json_obj: Dict) -> class_dependency.JavaClassDependencyGraph:
"""Creates a JavaClassDependencyGraph from a JSON representation."""
class_graph = class_dependency.JavaClassDependencyGraph()
for node_json_obj in json_obj[json_consts.NODES]:
name = node_json_obj[json_consts.NAME]
nested = node_json_obj[json_consts.META][
class_json_consts.NESTED_CLASSES]
added_node = class_graph.add_node_if_new(name)
added_node.nested_classes = set(nested)
for edge_json_obj in json_obj[json_consts.EDGES]:
begin_key = edge_json_obj[json_consts.BEGIN]
end_key = edge_json_obj[json_consts.END]
class_graph.add_edge_if_new(begin_key, end_key)
return class_graph
def dump_class_and_package_graphs_to_file(
class_graph: class_dependency.JavaClassDependencyGraph,
package_graph: package_dependency.JavaPackageDependencyGraph,
filename: str):
"""Dumps a JSON representation of the class + package graph to a file.
We dump both graphs together because the package graph in-memory holds
references to class nodes (for storing class edges comprising
a package edge), and hence the class graph is needed to recreate the
package graph. Since our use cases always want the package graph over the
class graph, there currently no point in dumping the class graph separately.
Structure:
{
'class_graph': { see JavaClassDependencyGraph.to_json },
'package_graph': { see JavaPackageDependencyGraph.to_json },
}
"""
json_obj = {
json_consts.CLASS_GRAPH: create_json_obj_from_graph(class_graph),
json_consts.PACKAGE_GRAPH: create_json_obj_from_graph(package_graph),
}
with open(filename, 'w') as json_file:
json.dump(json_obj, json_file, separators=(',', ':'))
def load_class_graph_from_file(
filename: str) -> class_dependency.JavaClassDependencyGraph:
"""Recreates a JavaClassDependencyGraph from a JSON file.
The file is expected to be in the format dumped by
`dump_package_graph_to_file`.
"""
with open(filename, 'r') as json_file:
json_obj = json.load(json_file)
class_graph_json_obj = json_obj[json_consts.CLASS_GRAPH]
return create_class_graph_from_json_obj(class_graph_json_obj)
def load_class_and_package_graphs_from_file(
filename: str
) -> Tuple[class_dependency.JavaClassDependencyGraph,
package_dependency.JavaPackageDependencyGraph]:
"""Recreates a Java(Class+Package)DependencyGraph from a JSON file.
The file is expected to be in the format dumped by
`dump_class_and_package_graphs_to_file`.
Note that we construct the package graph from the deserialized class graph,
not using the serialized package graph at all. This aligns with how we
construct the package graph when using jdeps. However, we still output
a serialized package graph for other consumers of the JSON (eg. JS-side)
which may want to bypass the costly conversion from class to package graph.
"""
class_graph = load_class_graph_from_file(filename)
package_graph = package_dependency.JavaPackageDependencyGraph(class_graph)
return class_graph, package_graph
#!/usr/bin/env python3
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Unit tests for dependency_analysis.serialization."""
import unittest.mock
import class_dependency
import class_json_consts
import graph
import json_consts
import package_dependency
import package_json_consts
import serialization
class TestSerialization(unittest.TestCase):
"""Unit tests for various de/serialization functions."""
CLASS_1 = 'p1.c1'
CLASS_2 = 'p1.c2'
CLASS_3 = 'p2.c3'
CLASS_1_NESTED_1 = 'abc'
CLASS_1_NESTED_2 = 'def'
CLASS_2_NESTED_1 = 'ghi'
# The lists in the following JSON are sorted,
# since we sort lists when serializing (for easier testing).
JSON_CLASS_GRAPH = {
json_consts.NODES: [
{
json_consts.NAME: CLASS_1,
json_consts.META: {
class_json_consts.PACKAGE:
'p1',
class_json_consts.CLASS:
'c1',
class_json_consts.NESTED_CLASSES:
[CLASS_1_NESTED_1, CLASS_1_NESTED_2],
},
},
{
json_consts.NAME: CLASS_2,
json_consts.META: {
class_json_consts.PACKAGE: 'p1',
class_json_consts.CLASS: 'c2',
class_json_consts.NESTED_CLASSES: [CLASS_2_NESTED_1],
},
},
{
json_consts.NAME: CLASS_3,
json_consts.META: {
class_json_consts.PACKAGE: 'p2',
class_json_consts.CLASS: 'c3',
class_json_consts.NESTED_CLASSES: [],
},
},
],
json_consts.EDGES: [
{
json_consts.BEGIN: CLASS_1,
json_consts.END: CLASS_2,
},
{
json_consts.BEGIN: CLASS_1,
json_consts.END: CLASS_3,
},
{
json_consts.BEGIN: CLASS_2,
json_consts.END: CLASS_3,
},
],
}
JSON_PACKAGE_GRAPH = {
json_consts.NODES: [
{
json_consts.NAME: 'p1',
json_consts.META: {
package_json_consts.CLASSES: [CLASS_1, CLASS_2],
},
},
{
json_consts.NAME: 'p2',
json_consts.META: {
package_json_consts.CLASSES: [CLASS_3],
},
},
],
json_consts.EDGES: [
{
json_consts.BEGIN: 'p1',
json_consts.END: 'p1',
json_consts.META: {
package_json_consts.CLASS_EDGES: [
[CLASS_1, CLASS_2],
],
},
},
{
json_consts.BEGIN: 'p1',
json_consts.END: 'p2',
json_consts.META: {
package_json_consts.CLASS_EDGES: [
[CLASS_1, CLASS_3],
[CLASS_2, CLASS_3],
],
},
},
],
}
def test_class_serialization(self):
"""Tests JSON serialization of a class dependency graph."""
test_graph = class_dependency.JavaClassDependencyGraph()
test_graph.add_edge_if_new(self.CLASS_1, self.CLASS_2)
test_graph.add_edge_if_new(self.CLASS_1, self.CLASS_3)
test_graph.add_edge_if_new(self.CLASS_2, self.CLASS_3)
test_graph.add_nested_class_to_key(self.CLASS_1, self.CLASS_1_NESTED_1)
test_graph.add_nested_class_to_key(self.CLASS_1, self.CLASS_1_NESTED_2)
test_graph.add_nested_class_to_key(self.CLASS_2, self.CLASS_2_NESTED_1)
test_json_obj = serialization.create_json_obj_from_graph(test_graph)
self.assertEqual(test_json_obj, self.JSON_CLASS_GRAPH)
def test_package_serialization(self):
"""Tests JSON serialization of a package dependency graph."""
class_graph = class_dependency.JavaClassDependencyGraph()
class_graph.add_edge_if_new(self.CLASS_1, self.CLASS_2)
class_graph.add_edge_if_new(self.CLASS_1, self.CLASS_3)
class_graph.add_edge_if_new(self.CLASS_2, self.CLASS_3)
class_graph.add_nested_class_to_key(self.CLASS_1,
self.CLASS_1_NESTED_1)
class_graph.add_nested_class_to_key(self.CLASS_1,
self.CLASS_1_NESTED_2)
class_graph.add_nested_class_to_key(self.CLASS_2,
self.CLASS_2_NESTED_1)
package_graph = package_dependency.JavaPackageDependencyGraph(
class_graph)
test_json_obj = serialization.create_json_obj_from_graph(package_graph)
self.assertEqual(test_json_obj, self.JSON_PACKAGE_GRAPH)
def test_class_deserialization(self):
"""Tests JSON deserialization of a class dependency graph.
Since we only ever construct package graphs from class graphs
(and that feature is tested elsewhere), we do not need to test
deserialization of package dependency graphs as well.
"""
test_graph = serialization.create_class_graph_from_json_obj(
self.JSON_CLASS_GRAPH)
node_1 = test_graph.get_node_by_key(self.CLASS_1)
node_2 = test_graph.get_node_by_key(self.CLASS_2)
node_3 = test_graph.get_node_by_key(self.CLASS_3)
self.assertIsNotNone(node_1)
self.assertIsNotNone(node_2)
self.assertIsNotNone(node_3)
self.assertEqual(node_1.nested_classes,
{self.CLASS_1_NESTED_1, self.CLASS_1_NESTED_2})
self.assertEqual(node_2.nested_classes, {self.CLASS_2_NESTED_1})
self.assertEqual(
graph.sorted_edges_by_name(test_graph.edges),
graph.sorted_edges_by_name([(node_1, node_2), (node_1, node_3),
(node_2, node_3)]))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment