Commit 312b6bf9 authored by Sam Bowen's avatar Sam Bowen Committed by Commit Bot

Script to generate entity/property names from Schema.org schema file

The C++ code generated will be used in Blink's
document_metadata_extractor.

Bug: 1044250
Change-Id: I49db529fa95bf6868eabeb6f0718476fd904898a
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2029010
Commit-Queue: Sam Bowen <sgbowen@google.com>
Reviewed-by: default avatarNico Weber <thakis@chromium.org>
Reviewed-by: default avatarAndrew Grieve <agrieve@chromium.org>
Reviewed-by: default avatarBecca Hughes <beccahughes@chromium.org>
Cr-Commit-Position: refs/heads/master@{#737473}
parent ff5cf862
[style]
based_on_style = pep8
beccahughes@chromium.org
steimel@chromium.org
sgbowen@google.com
# COMPONENT: Blink>DocumentMetadata
# TEAM: media-dev@chromium.org
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Chromium presubmit for components/schema_org/generate_schema_org_code."""
def _RunMakeGenerateSchemaOrgCodeTests(input_api, output_api):
"""Runs tests for generate_schema_org_code if related files were changed."""
files = ('components/schema_org/generate_schema_org_code.py',
'components/schema_org/generate_schema_org_code_unittest.py')
if not any(f in input_api.LocalPaths() for f in files):
return []
test_path = input_api.os_path.join(input_api.PresubmitLocalPath(),
'generate_schema_org_code_unittest.py')
cmd_name = 'generate_schema_org_code_unittest'
cmd = [input_api.python_executable, test_path]
test_cmd = input_api.Command(
name=cmd_name,
cmd=cmd,
kwargs={},
message=output_api.PresubmitPromptWarning)
return input_api.RunTests([test_cmd])
def CheckChangeOnUpload(input_api, output_api):
return _RunMakeGenerateSchemaOrgCodeTests(input_api, output_api)
def CheckChangeOnCommit(input_api, output_api):
return _RunMakeGenerateSchemaOrgCodeTests(input_api, output_api)
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Generates C++ code representing structured data objects from schema.org
This script generates C++ objects based on a JSON+LD schema file. Blink uses the
generated code to scrape schema.org data from web pages.
"""
import argparse
import json
import sys
import os
_current_dir = os.path.dirname(os.path.realpath(__file__))
# jinja2 is in chromium's third_party directory
# Insert at front to override system libraries, and after path[0] == script dir
sys.path.insert(
1, os.path.join(_current_dir, *([os.pardir] * 2 + ['third_party'])))
import jinja2
from jinja2 import Environment, PackageLoader, select_autoescape
env = Environment(loader=PackageLoader('generate_schema_org_code', ''))
def object_name_from_id(schema_org_id):
"""Get the object name from a schema.org ID."""
return schema_org_id[len('http://schema.org/'):]
def get_template_vars(schema_file_path):
"""Read the needed template variables from the schema file."""
template_vars = {'entities': [], 'properties': []}
with open(schema_file_path) as schema_file:
schema = json.loads(schema_file.read())
for thing in schema['@graph']:
if thing['@type'] == 'rdfs:Class':
template_vars['entities'].append(object_name_from_id(thing['@id']))
elif thing['@type'] == 'rdf:Property':
template_vars['properties'].append(
object_name_from_id(thing['@id']))
template_vars['entities'].sort()
template_vars['properties'].sort()
return template_vars
def generate_file(file_name, template_file, template_vars):
"""Generate and write file given a template and variables to render."""
template_vars['header_file'] = os.path.basename(
template_file[template_file.index('.')])
template_vars['header_guard'] = template_vars['header_file'].upper() + '_H'
with open(file_name, 'w') as f:
f.write(env.get_template(template_file).render(template_vars))
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'--schema-file',
help='Schema.org schema file to use for code generation.')
parser.add_argument(
'--output-dir',
help='Output directory in which to place generated code files.')
parser.add_argument('--templates', nargs='+')
args = parser.parse_args()
template_vars = get_template_vars(args.schema_file)
for template_file in args.templates:
generate_file(
os.path.join(args.output_dir,
os.path.basename(template_file.replace('.tmpl', ''))),
template_file, template_vars)
if __name__ == '__main__':
sys.exit(main())
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Tests for generate_schema_org_code."""
import sys
import unittest
import generate_schema_org_code
import os
SRC = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir)
sys.path.append(os.path.join(SRC, 'third_party', 'pymock'))
import mock
_current_dir = os.path.dirname(os.path.realpath(__file__))
# jinja2 is in chromium's third_party directory
# Insert at front to override system libraries, and after path[0] == script dir
sys.path.insert(
1, os.path.join(_current_dir, *([os.pardir] * 2 + ['third_party'])))
import jinja2
class GenerateSchemaOrgCodeTest(unittest.TestCase):
def test_get_template_vars(self):
file_content = """
{
"@graph": [
{
"@id": "http://schema.org/MediaObject",
"@type": "rdfs:Class"
},
{
"@id": "http://schema.org/propertyName",
"@type": "rdf:Property"
}
]
}
"""
with mock.patch('__builtin__.open',
mock.mock_open(read_data=file_content)) as m_open:
self.assertEqual(
generate_schema_org_code.get_template_vars(m_open), {
'entities': ['MediaObject'],
'properties': ['propertyName']
})
if __name__ == '__main__':
unittest.main()
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Generated by running //components/schema_org/generate_schema_org_code.py.
// Do not edit.
#include components/schema_org/{{ header_file }}.h
namespace schema_org {
namespace entity {
{% for entity in entities %}
char k{{entity}}[] = "{{entity}}";
{% endfor %}
} // entity
} // schema_org
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Generated by running //components/schema_org/generate_schema_org_code.py.
// Do not edit.
#ifndef SCHEMA_ORG_ENTITY_{{ header_guard | upper }}
#define SCHEMA_ORG_ENTITY_{{ header_guard | upper }}
#include <string>
namespace schema_org {
namespace entity {
{% for entity in entities %}
extern const char k{{entity}}[];
{% endfor %}
} // namespace entity
} // namespace schema_org
#endif // SCHEMA_ORG_ENTITY_{{ header_guard | upper }}
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Generated by running //components/schema_org/generate_schema_org_code.py.
// Do not edit.
#include components/schema_org/{{ header_file }}.h
namespace schema_org {
namespace property {
{% for property in properties %}
char k{{property | capitalize}}[] = "{{property}}";
{% endfor %}
} // property
} // schema_org
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Generated by running //components/schema_org/generate_schema_org_code.py.
// Do not edit.
#ifndef SCHEMA_ORG_PROPERTY_{{ header_guard | upper }}
#define SCHEMA_ORG_PROPERTY_{{ header_guard | upper }}
#include <string>
namespace schema_org {
namespace property {
{% for property in properties %}
extern const char k{{property}}[];
{% endfor %}
} // namespace property
} // namespace schema_org
#endif // SCHEMA_ORG_PROPERTY_{{ header_guard | upper }}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment