Commit d8ebb34e authored by jsbell's avatar jsbell Committed by Commit bot

Text Encoding: Convert fast/encoding tests to testharness.js

Convert a handful of js-test tests to testharness, with an eye towards
adding more and eventually upstreaming them.

R=foolip@chromium.org

Review-Url: https://codereview.chromium.org/2390083002
Cr-Commit-Position: refs/heads/master@{#422868}
parent d962357f
CONSOLE WARNING: line 4: Synchronous XMLHttpRequest on the main thread is deprecated because of its detrimental effects to the end user's experience. For more help, check https://xhr.spec.whatwg.org/.
Test encoding behavior for sequences with invalid trail bytes
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
PASS decode('UTF-8', '%C3%22') is 'U+FFFD/U+0022'
PASS decode('UTF-8', '%E2%22') is 'U+FFFD/U+0022'
PASS decode('UTF-8', '%E2%A0%22') is 'U+FFFD/U+FFFD/U+0022'
PASS decode('UTF-8', '%F0%90%80%22') is 'U+FFFD/U+FFFD/U+FFFD/U+0022'
PASS decode('EUC-KR', '%C4%22') is 'U+FFFD/U+0022'
PASS decode('EUC-KR', '%C4%5C') is 'U+FFFD/U+005C'
PASS decode('EUC-KR', '%C4%7B') is 'U+FFFD/U+007B'
PASS decode('EUC-KR', '%C6%53') is 'U+FFFD/U+0053'
PASS decode('EUC-KR', '%C7%41') is 'U+FFFD/U+0041'
PASS decode('EUC-KR', '%C7%81') is 'U+FFFD'
PASS decode('EUC-KR', '%FE%A1') is 'U+FFFD'
PASS decode('EUC-JP', '%8F%A1%A1') is 'U+FFFD'
PASS decode('EUC-JP', '%8F%A1%81%22') is 'U+FFFD/U+FFFD/U+0022'
PASS decode('EUC-JP', '%8F%A1%22') is 'U+FFFD/U+FFFD/U+0022'
PASS decode('EUC-JP', '%8E%8E%A1') is 'U+FFFD/U+FF61'
PASS decode('EUC-JP', '%8E%E0') is 'U+FFFD/U+FFFD'
PASS decode('Big5', '%A1%22') is 'U+FFFD/U+0022'
PASS decode('Big5', '%87%66') is 'U+FFFD/U+0066'
PASS decode('Big5', '%89%44') is 'U+FFFD/U+0044'
PASS decode('Big5', '%8A%63') is 'U+FFFD/U+0063'
PASS decode('Big5', '%8B%54') is 'U+FFFD/U+0054'
PASS decode('Big5', '%8D%41') is 'U+FFFD/U+0041'
PASS decode('Big5', '%9B%61') is 'U+FFFD/U+0061'
PASS decode('Big5', '%9F%4E') is 'U+FFFD/U+004E'
PASS decode('Big5', '%A0%54') is 'U+FFFD/U+0054'
PASS decode('Shift_JIS', '%82%23') is 'U+FFFD/U+0023'
PASS decode('Shift_JIS', '%82%5C') is 'U+FFFD/U+005C'
PASS decode('Shift_JIS', '%82%7A') is 'U+FFFD/U+007A'
PASS decode('Shift_JIS', '%84%61') is 'U+FFFD/U+0061'
PASS decode('Shift_JIS', '%85%7B') is 'U+FFFD/U+007B'
PASS decode('Shift_JIS', '%87%7B') is 'U+FFFD/U+007B'
PASS decode('Shift_JIS', '%98%7E') is 'U+FFFD/U+007E'
PASS decode('Shift_JIS', '%FC%5B') is 'U+FFFD/U+005B'
PASS decode('shift_jis', '%EB%9F') is 'U+FFFD'
PASS successfullyParsed is true
TEST COMPLETE
<!DOCTYPE html> <!DOCTYPE html>
<script src="../../resources/js-test.js"></script> <title>Character Decoding - Invalid Trail Bytes</title>
<script src="../../resources/testharness.js"></script>
<script src="../../resources/testharnessreport.js"></script>
<script src="resources/char-decoding-utils.js"></script> <script src="resources/char-decoding-utils.js"></script>
<script> <script>
description("Test encoding behavior for sequences with invalid trail bytes");
// UTF-8 codec emits replacement characters // UTF-8 codec emits replacement characters
testDecode('UTF-8', '%C3%22', 'U+FFFD/U+0022'); testDecode('UTF-8', '%C3%22', 'U+FFFD/U+0022');
testDecode('UTF-8', '%E2%22', 'U+FFFD/U+0022'); testDecode('UTF-8', '%E2%22', 'U+FFFD/U+0022');
......
CONSOLE WARNING: line 4: Synchronous XMLHttpRequest on the main thread is deprecated because of its detrimental effects to the end user's experience. For more help, check https://xhr.spec.whatwg.org/.
Test encoding behavior for truncated sequences
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
PASS decode('utf-8', '%E2%88%9A') is 'U+221A'
PASS decode('utf-8', '%E2%88') is 'U+FFFD/U+FFFD'
PASS decode('utf-8', '%E2') is 'U+FFFD'
PASS decode('utf-16', '%69%D8%D6%DE') is 'U+D869/U+DED6'
PASS decode('utf-16', '%69%D8%D6') is 'U+D869'
PASS decode('utf-16', '%69%D8') is 'U+D869'
PASS decode('utf-16', '%69') is ''
PASS decode('utf-16be', '%D8%69%DE%D6') is 'U+D869/U+DED6'
PASS decode('utf-16be', '%D8%69%DE') is 'U+D869'
PASS decode('utf-16be', '%D8%69') is 'U+D869'
PASS decode('utf-16be', '%D8') is ''
PASS decode('gb2312', '%A3%A0') is 'U+3000'
PASS decode('gb2312', '%A3') is 'U+FFFD'
PASS decode('shift_jis', '%82%d0') is 'U+3072'
PASS decode('shift_jis', '%82') is 'U+FFFD'
PASS decode('windows-949', '%A2%E6') is 'U+20AC'
PASS decode('windows-949', '%A2') is 'U+FFFD'
PASS successfullyParsed is true
TEST COMPLETE
<!DOCTYPE html> <!DOCTYPE html>
<script src="../../resources/js-test.js"></script> <title>Character Decoding - Truncated Sequences</title>
<script src="../../resources/testharness.js"></script>
<script src="../../resources/testharnessreport.js"></script>
<script src="resources/char-decoding-utils.js"></script> <script src="resources/char-decoding-utils.js"></script>
<script> <script>
description("Test encoding behavior for truncated sequences");
// UTF-8 codec emits replacement characters // UTF-8 codec emits replacement characters
testDecode('utf-8', '%E2%88%9A', 'U+221A'); testDecode('utf-8', '%E2%88%9A', 'U+221A');
testDecode('utf-8', '%E2%88', 'U+FFFD/U+FFFD'); testDecode('utf-8', '%E2%88', 'U+FFFD/U+FFFD');
......
<html> <!DOCTYPE html>
<head> <title>Character Decoding</title>
<script src="../../resources/js-test.js"></script> <script src="../../resources/testharness.js"></script>
<script src="../../resources/testharnessreport.js"></script>
<script src="resources/char-decoding-utils.js"></script> <script src="resources/char-decoding-utils.js"></script>
</head>
<body>
<script> <script>
description("This tests decoding characters in various character sets.");
testDecode('UTF-8', '%E2%88%9A', 'U+221A'); testDecode('UTF-8', '%E2%88%9A', 'U+221A');
// \xA3\xA0 in GBK should be mapped to U+3000 instead of U+E5E5. // \xA3\xA0 in GBK should be mapped to U+3000 instead of U+E5E5.
...@@ -154,5 +151,3 @@ testDecode('unicodeFFFE', '%D8%69%DE%D6', 'U+D869/U+DED6'); ...@@ -154,5 +151,3 @@ testDecode('unicodeFFFE', '%D8%69%DE%D6', 'U+D869/U+DED6');
}); });
</script> </script>
</body>
</html>
This tests encoding characters in various character sets.
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
PASS successfullyParsed is true
TEST COMPLETE
PASS encode('UTF-8', 'U+00A0') is '%C2%A0'
PASS encode('UTF-8', '0xD800') is '%EF%BF%BD'
PASS encode('UTF-8', '0xDC00') is '%EF%BF%BD'
PASS encode('GBK', 'U+00A5') is '%A3%A4'
PASS encode('gb2312', 'U+00A5') is '%A3%A4'
PASS encode('GB_2312-80', 'U+00A5') is '%A3%A4'
PASS encode('EUC-CN', 'U+00A5') is '%A3%A4'
PASS encode('GBK', 'U+20AC') is '%80'
PASS encode('gb2312', 'U+20AC') is '%80'
PASS encode('GB_2312-80', 'U+20AC') is '%80'
PASS encode('EUC-CN', 'U+20AC') is '%80'
PASS encode('GBK', 'U+01F9') is '%A8%BF'
PASS encode('GBK', 'U+1E3F') is '%A8%BC'
PASS encode('gb18030', 'U+01F9') is '%A8%BF'
PASS encode('gb18030', 'U+1E3F') is '%A8%BC'
PASS encode('GBK', 'U+2026') is '%A1%AD'
PASS encode('GBK', 'U+FF5E') is '%A1%AB'
PASS encode('gb18030', 'U+2026') is '%A1%AD'
PASS encode('gb18030', 'U+FF5E') is '%A1%AB'
PASS encode('GBK', 'U+22EF') is '%26%238943%3B'
PASS encode('GBK', 'U+301C') is '%26%2312316%3B'
PASS encode('Big5', 'U+2550') is '%F9%F9'
PASS encode('Big5', 'U+255E') is '%F9%E9'
PASS encode('Big5', 'U+2561') is '%F9%EB'
PASS encode('Big5', 'U+256A') is '%F9%EA'
PASS encode('Big5', 'U+5341') is '%A4Q'
PASS encode('Big5', 'U+5345') is '%A4%CA'
PASS encode('KOI8-U', 'U+045E') is '%AE'
PASS encode('KOI8-U', 'U+040E') is '%BE'
PASS encode('KOI8-RU', 'U+045E') is '%AE'
PASS encode('KOI8-RU', 'U+040E') is '%BE'
PASS encode('csiso2022kr', 'U+00A0') is '%C2%A0'
PASS encode('hz-gb-2312', 'U+00A0') is '%C2%A0'
PASS encode('iso-2022-cn', 'U+00A0') is '%C2%A0'
PASS encode('iso-2022-cn-ext', 'U+00A0') is '%C2%A0'
PASS encode('iso-2022-kr', 'U+00A0') is '%C2%A0'
<html> <!DOCTYPE html>
<head> <title>Character Encoding</title>
<script src="../../resources/js-test.js"></script> <script src="../../resources/testharness.js"></script>
<script src="../../resources/testharnessreport.js"></script>
<script src="resources/char-encoding-utils.js"></script> <script src="resources/char-encoding-utils.js"></script>
</head>
<body> <body>
<form id="form" method="GET" target="subframe"><input type="text" id="text" name="text"></form>
<iframe id="subframe" name="subframe"></iframe>
<script> <script>
var charsets = new Array;
var unicodes = new Array;
var expectedResults = new Array;
var results = new Object;
var i = 0;
testEncode("UTF-8", "U+00A0", "%C2%A0"); testEncode("UTF-8", "U+00A0", "%C2%A0");
testEncode('UTF-8', 'U+221A', '%E2%88%9A');
// Unpaired UTF-16 surrogates // Unpaired UTF-16 surrogates
testEncode("UTF-8", "0xD800", "%EF%BF%BD"); // U+FFFD (REPLACEMENT CHARACTER) testEncode("UTF-8", "0xD800", "%EF%BF%BD"); // U+FFFD (REPLACEMENT CHARACTER)
...@@ -69,13 +58,5 @@ testEncode("iso-2022-cn", "U+00A0", "%C2%A0"); ...@@ -69,13 +58,5 @@ testEncode("iso-2022-cn", "U+00A0", "%C2%A0");
testEncode("iso-2022-cn-ext", "U+00A0", "%C2%A0"); testEncode("iso-2022-cn-ext", "U+00A0", "%C2%A0");
testEncode("iso-2022-kr", "U+00A0", "%C2%A0"); testEncode("iso-2022-kr", "U+00A0", "%C2%A0");
// Turning on this test causes a download to occur. FIXME: A bug?
// testEncode('UTF-8', 'U+221A', '%E2%88%9A');
if (window.testRunner)
testRunner.waitUntilDone();
runTest();
</script> </script>
</body> </body>
</html>
function decodeText(charsetName, characterSequence) function decodeText(charsetName, characterSequence) {
{ return new Promise((resolve, reject) => {
var req = new XMLHttpRequest; const req = new XMLHttpRequest;
req.open('GET', 'data:text/plain,' + characterSequence, false); req.open('GET', `data:text/plain,${characterSequence}`);
req.overrideMimeType('text/plain; charset="' + charsetName + '"'); req.overrideMimeType(`text/plain; charset="${charsetName}"`);
req.send(''); req.send('');
return req.responseText; req.onload = () => resolve(req.responseText);
req.onerror = () => reject(new Error(req.statusText));
});
} }
function decode(charsetName, characterSequence) function decode(charsetName, characterSequence) {
{ return decodeText(charsetName, characterSequence).then(decodedText => {
var decodedText = decodeText(charsetName, characterSequence); return decodedText.split('')
var result = ""; .map(char => char.charCodeAt(0))
for (var i = 0; i < decodedText.length; ++i) { .map(code => 'U+' + ('0000' + code.toString(16).toUpperCase()).slice(-4))
var code = decodedText.charCodeAt(i).toString(16).toUpperCase(); .join('/');
if (i) });
result += "/";
result += "U+" + ("0000" + code).slice(-4);
}
return result;
} }
function testDecode(charsetName, characterSequence, unicode) function testDecode(charsetName, characterSequence, unicode) {
{ promise_test(t => {
shouldBe("decode('" + charsetName + "', '" + characterSequence + "')", "'" + unicode + "'"); return decode(charsetName, characterSequence).then(result => {
assert_equals(result, unicode);
});
}, `Decode ${charsetName}: ${characterSequence} => ${unicode}`);
} }
function batchTestDecode(inputData) function batchTestDecode(inputData) {
{ for (let i in inputData.encodings) {
for (var i in inputData.encodings) { for (let j in inputData.encoded) {
for (var j in inputData.encoded) testDecode(inputData.encodings[i],
testDecode(inputData.encodings[i], inputData.encoded[j], inputData.unicode[j]); inputData.encoded[j],
inputData.unicode[j]);
} }
}
} }
function encode(charset, unicode) let uniqueId = 0;
{ function encodeText(charsetName, unicode) {
// Returns a value already encoded, since we can't do it synchronously. return new Promise((resolve, reject) => {
return results[charset][unicode]; const frame_id = `subframe${++uniqueId}`;
}
const iframe = document.createElement('iframe');
function testsDone() iframe.style.display = 'none';
{ // |iframe.name| must be assigned before adding frame to the body or
var form = document.getElementById('form'); // |form.target| will not find it.
var subframe = document.getElementById('subframe'); iframe.name = frame_id;
document.body.appendChild(iframe);
form.parentNode.removeChild(form);
subframe.parentNode.removeChild(subframe); const form = document.body.appendChild(document.createElement('form'));
form.style.display = 'none';
description("This tests encoding characters in various character sets."); form.method = 'GET';
form.action = 'resources/dummy.html';
for (i = 0; i < charsets.length; ++i) { form.acceptCharset = charsetName;
shouldBe("encode('" + charsets[i] + "', '" + unicodes[i] + "')", "'" + expectedResults[i] + "'"); form.target = frame_id;
}
const input = form.appendChild(document.createElement('input'));
if (window.testRunner) input.type = 'text';
testRunner.notifyDone(); input.name = 'text';
} input.value = String.fromCharCode(unicode.replace('U+', '0x'));
function processResult(result) iframe.onload = () => {
{ const url = iframe.contentWindow.location.href;
var charsetResults = results[charsets[i]]; const result = url.substr(url.indexOf('=') + 1);
if (!charsetResults) {
charsetResults = new Object; iframe.remove();
results[charsets[i]] = charsetResults; form.remove();
}
charsetResults[unicodes[i]] = result; resolve(result);
} };
function subframeLoaded()
{
var URL = "" + document.getElementById('subframe').contentWindow.location;
processResult(URL.substr(URL.indexOf('=') + 1));
++i;
runTest();
}
function runTest()
{
if (i >= charsets.length) {
testsDone();
return;
}
var form = document.getElementById('form');
var text = document.getElementById('text');
var subframe = document.getElementById('subframe');
form.acceptCharset = charsets[i];
form.action = "resources/dummy.html";
subframe.onload = subframeLoaded;
text.value = String.fromCharCode(unicodes[i].replace('U+', '0x'));
form.submit(); form.submit();
});
} }
function testEncode(charsetName, unicode, characterSequence) function testEncode(charsetName, unicode, characterSequence) {
{ promise_test(t => {
charsets.push(charsetName); return encodeText(charsetName, unicode).then(result => {
unicodes.push(unicode); assert_equals(result, characterSequence);
expectedResults.push(characterSequence); });
}, `Encode ${charsetName}: ${unicode} -> ${characterSequence}`);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment