Commit ca1327a4 authored by Mike Frysinger's avatar Mike Frysinger Committed by Commit Bot

grit: xtb_reader: parse inputs in binary mode

When parsing opening files in text mode for XTB parsing, Python 3
will use the active system locale.  If the locale is not UTF-8, we
run into errors as Python tries to convert to ASCII.  To avoid this,
open the XTB files in binary mode before passing to the XML layers.

Normally another option would be to use io.open with an explicit
encoding='utf-8' setting, and indeed, that works fine for Python 3.
Unfortunately Python 2's XML libs don't handle unicode strings well
leading to similar ASCII encoding errors.  It seems to work fine
when opened in binary mode though, so lets go that route.

Due to Python 2's implementation of bytes, the xtb_reader.Parse API
change shouldn't be noticed.  The code didn't work well with Python
3, so there aren't any users there yet :).  Not that it seems like
a big deal as cs.chromium.org says all API users are internal.

Bug: 983071
Test: `./grit/test_suite_all.py` passes

Change-Id: Idbd282ce9b93c4fcb4bb2dc4cce1dba3a84f6c27
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2043315Reviewed-by: default avatarLei Zhang <thestig@chromium.org>
Commit-Queue: Mike Frysinger <vapier@chromium.org>
Cr-Commit-Position: refs/heads/master@{#739443}
parent 91e56282
...@@ -41,7 +41,7 @@ class FileNode(base.Node): ...@@ -41,7 +41,7 @@ class FileNode(base.Node):
defs = getattr(root, 'defines', {}) defs = getattr(root, 'defines', {})
target_platform = getattr(root, 'target_platform', '') target_platform = getattr(root, 'target_platform', '')
xtb_file = open(self.ToRealPath(self.GetInputPath())) xtb_file = open(self.ToRealPath(self.GetInputPath()), 'rb')
try: try:
lang = xtb_reader.Parse(xtb_file, lang = xtb_reader.Parse(xtb_file,
self.UberClique().GenerateXtbParserCallback( self.UberClique().GenerateXtbParserCallback(
......
...@@ -49,7 +49,7 @@ to being one message for the whole menu.''' ...@@ -49,7 +49,7 @@ to being one message for the whole menu.'''
msg.append(part[1]) msg.append(part[1])
if len(msg): if len(msg):
xtb[msg_id] = ''.join(msg) xtb[msg_id] = ''.join(msg)
with open(xtb_file) as f: with open(xtb_file, 'rb') as f:
xtb_reader.Parse(f, Callback) xtb_reader.Parse(f, Callback)
translations = [] # list of translations as per transl2tc.WriteTranslations translations = [] # list of translations as per transl2tc.WriteTranslations
......
...@@ -116,7 +116,7 @@ def Parse(xtb_file, callback_function, defs=None, debug=False, ...@@ -116,7 +116,7 @@ def Parse(xtb_file, callback_function, defs=None, debug=False,
(if is_placeholder is True). (if is_placeholder is True).
Args: Args:
xtb_file: open('fr.xtb') xtb_file: open('fr.xtb', 'rb')
callback_function: def Callback(msg_id, parts): pass callback_function: def Callback(msg_id, parts): pass
defs: None, or a dictionary of preprocessor definitions. defs: None, or a dictionary of preprocessor definitions.
debug: Default False. Set True for verbose debug output. debug: Default False. Set True for verbose debug output.
...@@ -131,7 +131,7 @@ def Parse(xtb_file, callback_function, defs=None, debug=False, ...@@ -131,7 +131,7 @@ def Parse(xtb_file, callback_function, defs=None, debug=False,
# TODO(joi) Remove this ugly hack by getting the TC gang to change the # TODO(joi) Remove this ugly hack by getting the TC gang to change the
# XTB files somehow? # XTB files somehow?
front_of_file = xtb_file.read(1024) front_of_file = xtb_file.read(1024)
xtb_file.seek(front_of_file.find('<translationbundle')) xtb_file.seek(front_of_file.find(b'<translationbundle'))
handler = XtbContentHandler(callback=callback_function, defs=defs, handler = XtbContentHandler(callback=callback_function, defs=defs,
debug=debug, target_platform=target_platform) debug=debug, target_platform=target_platform)
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
from __future__ import print_function from __future__ import print_function
import io
import os import os
import sys import sys
if __name__ == '__main__': if __name__ == '__main__':
...@@ -14,8 +15,6 @@ if __name__ == '__main__': ...@@ -14,8 +15,6 @@ if __name__ == '__main__':
import unittest import unittest
from six import StringIO
from grit import util from grit import util
from grit import xtb_reader from grit import xtb_reader
from grit.node import empty from grit.node import empty
...@@ -23,7 +22,7 @@ from grit.node import empty ...@@ -23,7 +22,7 @@ from grit.node import empty
class XtbReaderUnittest(unittest.TestCase): class XtbReaderUnittest(unittest.TestCase):
def testParsing(self): def testParsing(self):
xtb_file = StringIO('''<?xml version="1.0" encoding="UTF-8"?> xtb_file = io.BytesIO(b'''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE translationbundle> <!DOCTYPE translationbundle>
<translationbundle lang="fr"> <translationbundle lang="fr">
<translation id="5282608565720904145">Bingo.</translation> <translation id="5282608565720904145">Bingo.</translation>
...@@ -59,12 +58,14 @@ and another after a blank line.</translation> ...@@ -59,12 +58,14 @@ and another after a blank line.</translation>
clique_hello_user = msgs.children[1].GetCliques()[0] clique_hello_user = msgs.children[1].GetCliques()[0]
msg_hello_user = clique_hello_user.GetMessage() msg_hello_user = clique_hello_user.GetMessage()
xtb_file = StringIO('''<?xml version="1.0" encoding="UTF-8"?> xtb_file = io.BytesIO(b'''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE translationbundle> <!DOCTYPE translationbundle>
<translationbundle lang="is"> <translationbundle lang="is">
<translation id="%s">Meirihattar!</translation> <translation id="%s">Meirihattar!</translation>
<translation id="%s">Saelir <ph name="USERNAME"/></translation> <translation id="%s">Saelir <ph name="USERNAME"/></translation>
</translationbundle>''' % (msg_mega.GetId(), msg_hello_user.GetId())) </translationbundle>''' % (
msg_mega.GetId().encode('utf-8'),
msg_hello_user.GetId().encode('utf-8')))
xtb_reader.Parse(xtb_file, xtb_reader.Parse(xtb_file,
msgs.UberClique().GenerateXtbParserCallback('is')) msgs.UberClique().GenerateXtbParserCallback('is'))
...@@ -82,7 +83,7 @@ and another after a blank line.</translation> ...@@ -82,7 +83,7 @@ and another after a blank line.</translation>
clique = msgs.children[0].GetCliques()[0] clique = msgs.children[0].GetCliques()[0]
msg = clique.GetMessage() msg = clique.GetMessage()
xtb_file = StringIO('''<?xml version="1.0" encoding="UTF-8"?> xtb_file = io.BytesIO(b'''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE translationbundle> <!DOCTYPE translationbundle>
<translationbundle lang="is"> <translationbundle lang="is">
<if expr="is_linux"> <if expr="is_linux">
...@@ -100,7 +101,8 @@ and another after a blank line.</translation> ...@@ -100,7 +101,8 @@ and another after a blank line.</translation>
def testParseLargeFile(self): def testParseLargeFile(self):
def Callback(id, structure): def Callback(id, structure):
pass pass
with open(util.PathFromRoot('grit/testdata/generated_resources_fr.xtb')) as xtb: path = util.PathFromRoot('grit/testdata/generated_resources_fr.xtb')
with open(path, 'rb') as xtb:
xtb_reader.Parse(xtb, Callback) xtb_reader.Parse(xtb, Callback)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment