Commit 2308d074 authored by meacer's avatar meacer Committed by Commit Bot

Support grdp files in translation screenshot presubmit

Translation screenshots project requires Chrome devs to upload UI
screenshots along with string changes in .grd and .grdp files. These
are XML files containing all user visible strings in Chrome.

Translation screenshots has a presubmit checks that extracts the
list of modified strings, looks for image files associated with these
strings and warns the developer to add them if they are missing.

This presubmit parses .grds to extract the list of modified strings but
it skips <part> tags which are used to reference .grdp files. As a
result, the presubmit currently ignores string changes in .grdp files.
.grdp files contain the majority of UI strings in Chrome, so the lack of
presubmit checks for them results in low translation screenshot coverage.

This CL changes how .grdp files are loaded in the presubmit: Previously,
they were written in a temporary directory alongside a fake .grd file
that referenced the loaded .grdp file. The code loaded the fake .grd file
which resulted in loading the strings in the actual .grdp file. However,
test mocks override all os.path methods which in turn breaks temporary
directory creation. As a result, it was not possible to properly test
the .grdp loading code, so we skipped <part> tags as a workaround.

The new loading code writes a temporary copy of the .grdp file and wraps
its contents with proper tags so that it can be loaded as a .grd file
instead. This avoids the need to create a temporary directory and is
fully testable.

The end result is that Translation Screenshots presubmit will now ask
Chrome devs to upload screenshots for changes in .grdp files. This should
significantly increase the coverage of strings with translation screenshots,
leading to better quality in string localizations.

Bug: 924652
Change-Id: Iae7933e8147cefdabc15ef21d2b6daa43d5002bc
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1875554Reviewed-by: default avataranthonyvd <anthonyvd@chromium.org>
Reviewed-by: default avatarJochen Eisinger <jochen@chromium.org>
Commit-Queue: Mustafa Emre Acer <meacer@chromium.org>
Cr-Commit-Position: refs/heads/master@{#714550}
parent 4bc4f394
...@@ -4638,7 +4638,6 @@ def CheckChangeOnCommit(input_api, output_api): ...@@ -4638,7 +4638,6 @@ def CheckChangeOnCommit(input_api, output_api):
def _CheckTranslationScreenshots(input_api, output_api): def _CheckTranslationScreenshots(input_api, output_api):
PART_FILE_TAG = "part"
import os import os
import sys import sys
from io import StringIO from io import StringIO
...@@ -4646,47 +4645,11 @@ def _CheckTranslationScreenshots(input_api, output_api): ...@@ -4646,47 +4645,11 @@ def _CheckTranslationScreenshots(input_api, output_api):
try: try:
old_sys_path = sys.path old_sys_path = sys.path
sys.path = sys.path + [input_api.os_path.join( sys.path = sys.path + [input_api.os_path.join(
input_api.PresubmitLocalPath(), 'tools', 'grit')] input_api.PresubmitLocalPath(), 'tools', 'translation')]
import grit.grd_reader from helper import grd_helper
import grit.node.message
import grit.util
finally: finally:
sys.path = old_sys_path sys.path = old_sys_path
def _GetGrdMessages(grd_path_or_string, dir_path='.'):
"""Load the grd file and return a dict of message ids to messages.
Ignores any nested grdp files pointed by <part> tag.
"""
doc = grit.grd_reader.Parse(grd_path_or_string, dir_path,
stop_after=None, first_ids_file=None,
debug=False, defines={'_chromium': 1},
tags_to_ignore=set([PART_FILE_TAG]))
return {
msg.attrs['name']:msg for msg in doc.GetChildrenOfType(
grit.node.message.MessageNode)
}
def _GetGrdpMessagesFromString(grdp_string):
"""Parses the contents of a grdp file given in grdp_string.
grd_reader can't parse grdp files directly. Instead, this creates a
temporary directory with a grd file pointing to the grdp file, and loads the
grd from there. Any nested grdp files (pointed by <part> tag) are ignored.
"""
WRAPPER = """<?xml version="1.0" encoding="utf-8"?>
<grit latest_public_release="1" current_release="1">
<release seq="1">
<messages>
<part file="sub.grdp" />
</messages>
</release>
</grit>
"""
with grit.util.TempDir({'main.grd': WRAPPER,
'sub.grdp': grdp_string}) as temp_dir:
return _GetGrdMessages(temp_dir.GetPath('main.grd'), temp_dir.GetPath())
new_or_added_paths = set(f.LocalPath() new_or_added_paths = set(f.LocalPath()
for f in input_api.AffectedFiles() for f in input_api.AffectedFiles()
if (f.Action() == 'A' or f.Action() == 'M')) if (f.Action() == 'A' or f.Action() == 'M'))
...@@ -4747,18 +4710,18 @@ def _CheckTranslationScreenshots(input_api, output_api): ...@@ -4747,18 +4710,18 @@ def _CheckTranslationScreenshots(input_api, output_api):
new_id_to_msg_map = {} new_id_to_msg_map = {}
if file_path.endswith('.grdp'): if file_path.endswith('.grdp'):
if f.OldContents(): if f.OldContents():
old_id_to_msg_map = _GetGrdpMessagesFromString( old_id_to_msg_map = grd_helper.GetGrdpMessagesFromString(
unicode('\n'.join(f.OldContents()))) unicode('\n'.join(f.OldContents())))
if f.NewContents(): if f.NewContents():
new_id_to_msg_map = _GetGrdpMessagesFromString( new_id_to_msg_map = grd_helper.GetGrdpMessagesFromString(
unicode('\n'.join(f.NewContents()))) unicode('\n'.join(f.NewContents())))
else: else:
if f.OldContents(): if f.OldContents():
old_id_to_msg_map = _GetGrdMessages( old_id_to_msg_map = grd_helper.GetGrdMessages(
StringIO(unicode('\n'.join(f.OldContents())))) StringIO(unicode('\n'.join(f.OldContents()))), '.')
if f.NewContents(): if f.NewContents():
new_id_to_msg_map = _GetGrdMessages( new_id_to_msg_map = grd_helper.GetGrdMessages(
StringIO(unicode('\n'.join(f.NewContents())))) StringIO(unicode('\n'.join(f.NewContents()))), '.')
# Compute added, removed and modified message IDs. # Compute added, removed and modified message IDs.
old_ids = set(old_id_to_msg_map) old_ids = set(old_id_to_msg_map)
......
This diff is collapsed.
...@@ -24,3 +24,19 @@ repositories, it may contain `.png` files generated by Chrome developers, such a ...@@ -24,3 +24,19 @@ repositories, it may contain `.png` files generated by Chrome developers, such a
For more information, see [https://g.co/chrome/translation](https://g.co/chrome/translation). For more information, see [https://g.co/chrome/translation](https://g.co/chrome/translation).
# Run the tests
The presubmit automatically runs all files named `*_unittest.py `:
```
git cl presubmit --force
```
python
# Run sanity checks
This will attempt to load all .grd and .grdp files in the Chrome repo.
Run once before uploading the CL to ensure everything works.
```
python helper/sanity_check.py
```
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import sys
import tempfile
here = os.path.dirname(os.path.realpath(__file__))
repo_root = os.path.normpath(os.path.join(here, '..', '..', '..'))
try:
old_sys_path = sys.path
sys.path = sys.path + [os.path.join(repo_root, 'tools', 'grit')]
import grit.grd_reader
import grit.node.message
import grit.util
finally:
sys.path = old_sys_path
TAGS_TO_IGNORE = (
# <include> tags mostly point to resource files that don't contain UI
# strings.
'include',
# <structure> tags point to image files.
'structure')
def GetGrdMessages(grd_path_or_string, dir_path):
"""Load the grd file and return a dict of message ids to messages."""
doc = grit.grd_reader.Parse(
grd_path_or_string,
dir_path,
stop_after=None,
first_ids_file=None,
debug=False,
defines={'_chromium': 1},
tags_to_ignore=set(TAGS_TO_IGNORE))
return {
msg.attrs['name']: msg
for msg in doc.GetChildrenOfType(grit.node.message.MessageNode)
}
def GetGrdpMessagesFromString(grdp_string):
"""Parses the contents of the grdp file given in grdp_string.
grd_reader can't parse grdp files directly. Instead, this replaces grd
specific tags in the input string with grdp specific tags, writes the output
string in a temporary file and loads the grd from the temporary file.
This code previously created a temporary directory (using grit.util), wrote
a temporary grd file pointing to another temporary grdp file that contained
the input string. This was not testable since os.path.exists is overridden
in tests and grit.util uses mkdirs which in turn calls os.path.exists. This
new code works because it doesn't need to check the existence of a path.
It's expected that a grdp file will not refer to another grdp file via a
<part> tag. Currently, none of the grdp files in the repository does that.
"""
replaced_string = grdp_string.replace(
'<grit-part>',
"""<grit base_dir="." latest_public_release="1" current_release="1">
<release seq="1" allow_pseudo="false">
<messages fallback_to_english="true">
""")
replaced_string = replaced_string.replace(
'</grit-part>', """</messages>
</release>
</grit>""")
with tempfile.NamedTemporaryFile() as f:
f.write(replaced_string.encode('utf-8'))
f.flush()
f.seek(0)
return GetGrdMessages(f.name, os.path.dirname(f.name))
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Unit tests for grd_helper.py."""
import io
import os
import unittest
from .helper import grd_helper
here = os.path.dirname(os.path.realpath(__file__))
repo_root = os.path.normpath(os.path.join(here, '..', '..', '..'))
def read_file_as_text(path):
with io.open(path, mode='r', encoding='utf-8') as f:
return f.read()
class GrdHelperTest(unittest.TestCase):
def testReadGrdMessages(self):
grd_dir = os.path.join(repo_root, 'tools', 'translation', 'testdata')
messages = grd_helper.GetGrdMessages(
os.path.join(grd_dir, 'test.grd'), grd_dir)
# Result should contain all messages from test.grd and part.grdp.
self.assertTrue('IDS_TEST_STRING1' in messages)
self.assertTrue('IDS_TEST_STRING2' in messages)
self.assertTrue('IDS_TEST_STRING_NON_TRANSLATEABLE' in messages)
self.assertTrue('IDS_PART_STRING_NON_TRANSLATEABLE' in messages)
def testReadGrdpMessages(self):
messages = grd_helper.GetGrdpMessagesFromString(
read_file_as_text(
os.path.join(repo_root, 'tools', 'translation', 'testdata',
'part.grdp')))
self.assertTrue('IDS_PART_STRING1' in messages)
self.assertTrue('IDS_PART_STRING2' in messages)
print('DONE')
if __name__ == '__main__':
unittest.main()
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Sanity checking for grd_helper.py. Run manually before uploading a CL."""
from .helper import grd_helper
from .helper import translation_helper
import io
import os
import subprocess
import sys
if sys.platform.startswith('win'):
# Use the |git.bat| in the depot_tools/ on Windows.
GIT = 'git.bat'
else:
GIT = 'git'
here = os.path.dirname(os.path.realpath(__file__))
repo_root = os.path.normpath(os.path.join(here, '..', '..', '..'))
def list_files_in_repository(repo_path, pattern):
"""Lists all files matching given pattern in the given git repository"""
# This works because git does its own glob expansion even though there is no
# shell to do it.
output = subprocess.check_output([GIT, 'ls-files', '--', pattern],
cwd=repo_path)
return output.strip().splitlines()
def read_file_as_text(path):
with io.open(path, mode='r', encoding='utf-8') as f:
return f.read()
# Sanity checks to ensure that we can parse all grd and grdp files in the repo.
# Must not fail.
def Run():
grds = list_files_in_repository(repo_root, '*.grd')
grdps = list_files_in_repository(repo_root, '*.grdp')
print('Found %d grds, %d grdps in the repo.' % (len(grds), len(grdps)))
# Make sure we can parse all .grd files in the source tree. Grd files are
# parsed via the file path.
for grd in grds:
# This file is intentionally missing an include, skip it.
if grd == os.path.join('tools', 'translation', 'testdata', 'internal.grd'):
continue
path = os.path.join(repo_root, grd)
grd_helper.GetGrdMessages(path, os.path.dirname(path))
# Make sure we can parse all .grdp files in the source tree.
# Grdp files are parsed using file contents instead of path.
for grdp in grdps:
path = os.path.join(repo_root, grdp)
# Parse grdp files using file contents.
contents = read_file_as_text(path)
grd_helper.GetGrdpMessagesFromString(contents)
print('Successfully parsed all .grd and .grdp files in the repo.')
# Additional check for translateable grds. Translateable grds are a subset
# of all grds so this checks some files twice, but it exercises the
# get_translatable_grds() path and also doesn't need to skip internal.grd.
TRANSLATION_EXPECTATIONS_PATH = os.path.join(repo_root, 'tools',
'gritsettings',
'translation_expectations.pyl')
translateable_grds = translation_helper.get_translatable_grds(
repo_root, grds, TRANSLATION_EXPECTATIONS_PATH)
print('Found %d translateable .grd files in translation expectations.' %
len(translateable_grds))
for grd in translateable_grds:
path = os.path.join(repo_root, grd.path)
grd_helper.GetGrdMessages(path, os.path.dirname(path))
print('Successfully parsed all translateable_grds .grd files in translation '
'expectations.')
print('DONE')
if __name__ == '__main__':
Run()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment