Commit d8ebb34e authored by jsbell's avatar jsbell Committed by Commit bot

Text Encoding: Convert fast/encoding tests to testharness.js

Convert a handful of js-test tests to testharness, with an eye towards
adding more and eventually upstreaming them.

R=foolip@chromium.org

Review-Url: https://codereview.chromium.org/2390083002
Cr-Commit-Position: refs/heads/master@{#422868}
parent d962357f
CONSOLE WARNING: line 4: Synchronous XMLHttpRequest on the main thread is deprecated because of its detrimental effects to the end user's experience. For more help, check https://xhr.spec.whatwg.org/.
Test encoding behavior for sequences with invalid trail bytes
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
PASS decode('UTF-8', '%C3%22') is 'U+FFFD/U+0022'
PASS decode('UTF-8', '%E2%22') is 'U+FFFD/U+0022'
PASS decode('UTF-8', '%E2%A0%22') is 'U+FFFD/U+FFFD/U+0022'
PASS decode('UTF-8', '%F0%90%80%22') is 'U+FFFD/U+FFFD/U+FFFD/U+0022'
PASS decode('EUC-KR', '%C4%22') is 'U+FFFD/U+0022'
PASS decode('EUC-KR', '%C4%5C') is 'U+FFFD/U+005C'
PASS decode('EUC-KR', '%C4%7B') is 'U+FFFD/U+007B'
PASS decode('EUC-KR', '%C6%53') is 'U+FFFD/U+0053'
PASS decode('EUC-KR', '%C7%41') is 'U+FFFD/U+0041'
PASS decode('EUC-KR', '%C7%81') is 'U+FFFD'
PASS decode('EUC-KR', '%FE%A1') is 'U+FFFD'
PASS decode('EUC-JP', '%8F%A1%A1') is 'U+FFFD'
PASS decode('EUC-JP', '%8F%A1%81%22') is 'U+FFFD/U+FFFD/U+0022'
PASS decode('EUC-JP', '%8F%A1%22') is 'U+FFFD/U+FFFD/U+0022'
PASS decode('EUC-JP', '%8E%8E%A1') is 'U+FFFD/U+FF61'
PASS decode('EUC-JP', '%8E%E0') is 'U+FFFD/U+FFFD'
PASS decode('Big5', '%A1%22') is 'U+FFFD/U+0022'
PASS decode('Big5', '%87%66') is 'U+FFFD/U+0066'
PASS decode('Big5', '%89%44') is 'U+FFFD/U+0044'
PASS decode('Big5', '%8A%63') is 'U+FFFD/U+0063'
PASS decode('Big5', '%8B%54') is 'U+FFFD/U+0054'
PASS decode('Big5', '%8D%41') is 'U+FFFD/U+0041'
PASS decode('Big5', '%9B%61') is 'U+FFFD/U+0061'
PASS decode('Big5', '%9F%4E') is 'U+FFFD/U+004E'
PASS decode('Big5', '%A0%54') is 'U+FFFD/U+0054'
PASS decode('Shift_JIS', '%82%23') is 'U+FFFD/U+0023'
PASS decode('Shift_JIS', '%82%5C') is 'U+FFFD/U+005C'
PASS decode('Shift_JIS', '%82%7A') is 'U+FFFD/U+007A'
PASS decode('Shift_JIS', '%84%61') is 'U+FFFD/U+0061'
PASS decode('Shift_JIS', '%85%7B') is 'U+FFFD/U+007B'
PASS decode('Shift_JIS', '%87%7B') is 'U+FFFD/U+007B'
PASS decode('Shift_JIS', '%98%7E') is 'U+FFFD/U+007E'
PASS decode('Shift_JIS', '%FC%5B') is 'U+FFFD/U+005B'
PASS decode('shift_jis', '%EB%9F') is 'U+FFFD'
PASS successfullyParsed is true
TEST COMPLETE
<!DOCTYPE html>
<script src="../../resources/js-test.js"></script>
<title>Character Decoding - Invalid Trail Bytes</title>
<script src="../../resources/testharness.js"></script>
<script src="../../resources/testharnessreport.js"></script>
<script src="resources/char-decoding-utils.js"></script>
<script>
description("Test encoding behavior for sequences with invalid trail bytes");
// UTF-8 codec emits replacement characters
testDecode('UTF-8', '%C3%22', 'U+FFFD/U+0022');
testDecode('UTF-8', '%E2%22', 'U+FFFD/U+0022');
......
CONSOLE WARNING: line 4: Synchronous XMLHttpRequest on the main thread is deprecated because of its detrimental effects to the end user's experience. For more help, check https://xhr.spec.whatwg.org/.
Test encoding behavior for truncated sequences
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
PASS decode('utf-8', '%E2%88%9A') is 'U+221A'
PASS decode('utf-8', '%E2%88') is 'U+FFFD/U+FFFD'
PASS decode('utf-8', '%E2') is 'U+FFFD'
PASS decode('utf-16', '%69%D8%D6%DE') is 'U+D869/U+DED6'
PASS decode('utf-16', '%69%D8%D6') is 'U+D869'
PASS decode('utf-16', '%69%D8') is 'U+D869'
PASS decode('utf-16', '%69') is ''
PASS decode('utf-16be', '%D8%69%DE%D6') is 'U+D869/U+DED6'
PASS decode('utf-16be', '%D8%69%DE') is 'U+D869'
PASS decode('utf-16be', '%D8%69') is 'U+D869'
PASS decode('utf-16be', '%D8') is ''
PASS decode('gb2312', '%A3%A0') is 'U+3000'
PASS decode('gb2312', '%A3') is 'U+FFFD'
PASS decode('shift_jis', '%82%d0') is 'U+3072'
PASS decode('shift_jis', '%82') is 'U+FFFD'
PASS decode('windows-949', '%A2%E6') is 'U+20AC'
PASS decode('windows-949', '%A2') is 'U+FFFD'
PASS successfullyParsed is true
TEST COMPLETE
<!DOCTYPE html>
<script src="../../resources/js-test.js"></script>
<title>Character Decoding - Truncated Sequences</title>
<script src="../../resources/testharness.js"></script>
<script src="../../resources/testharnessreport.js"></script>
<script src="resources/char-decoding-utils.js"></script>
<script>
description("Test encoding behavior for truncated sequences");
// UTF-8 codec emits replacement characters
testDecode('utf-8', '%E2%88%9A', 'U+221A');
testDecode('utf-8', '%E2%88', 'U+FFFD/U+FFFD');
......
<html>
<head>
<script src="../../resources/js-test.js"></script>
<!DOCTYPE html>
<title>Character Decoding</title>
<script src="../../resources/testharness.js"></script>
<script src="../../resources/testharnessreport.js"></script>
<script src="resources/char-decoding-utils.js"></script>
</head>
<body>
<script>
description("This tests decoding characters in various character sets.");
testDecode('UTF-8', '%E2%88%9A', 'U+221A');
// \xA3\xA0 in GBK should be mapped to U+3000 instead of U+E5E5.
......@@ -154,5 +151,3 @@ testDecode('unicodeFFFE', '%D8%69%DE%D6', 'U+D869/U+DED6');
});
</script>
</body>
</html>
This tests encoding characters in various character sets.
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
PASS successfullyParsed is true
TEST COMPLETE
PASS encode('UTF-8', 'U+00A0') is '%C2%A0'
PASS encode('UTF-8', '0xD800') is '%EF%BF%BD'
PASS encode('UTF-8', '0xDC00') is '%EF%BF%BD'
PASS encode('GBK', 'U+00A5') is '%A3%A4'
PASS encode('gb2312', 'U+00A5') is '%A3%A4'
PASS encode('GB_2312-80', 'U+00A5') is '%A3%A4'
PASS encode('EUC-CN', 'U+00A5') is '%A3%A4'
PASS encode('GBK', 'U+20AC') is '%80'
PASS encode('gb2312', 'U+20AC') is '%80'
PASS encode('GB_2312-80', 'U+20AC') is '%80'
PASS encode('EUC-CN', 'U+20AC') is '%80'
PASS encode('GBK', 'U+01F9') is '%A8%BF'
PASS encode('GBK', 'U+1E3F') is '%A8%BC'
PASS encode('gb18030', 'U+01F9') is '%A8%BF'
PASS encode('gb18030', 'U+1E3F') is '%A8%BC'
PASS encode('GBK', 'U+2026') is '%A1%AD'
PASS encode('GBK', 'U+FF5E') is '%A1%AB'
PASS encode('gb18030', 'U+2026') is '%A1%AD'
PASS encode('gb18030', 'U+FF5E') is '%A1%AB'
PASS encode('GBK', 'U+22EF') is '%26%238943%3B'
PASS encode('GBK', 'U+301C') is '%26%2312316%3B'
PASS encode('Big5', 'U+2550') is '%F9%F9'
PASS encode('Big5', 'U+255E') is '%F9%E9'
PASS encode('Big5', 'U+2561') is '%F9%EB'
PASS encode('Big5', 'U+256A') is '%F9%EA'
PASS encode('Big5', 'U+5341') is '%A4Q'
PASS encode('Big5', 'U+5345') is '%A4%CA'
PASS encode('KOI8-U', 'U+045E') is '%AE'
PASS encode('KOI8-U', 'U+040E') is '%BE'
PASS encode('KOI8-RU', 'U+045E') is '%AE'
PASS encode('KOI8-RU', 'U+040E') is '%BE'
PASS encode('csiso2022kr', 'U+00A0') is '%C2%A0'
PASS encode('hz-gb-2312', 'U+00A0') is '%C2%A0'
PASS encode('iso-2022-cn', 'U+00A0') is '%C2%A0'
PASS encode('iso-2022-cn-ext', 'U+00A0') is '%C2%A0'
PASS encode('iso-2022-kr', 'U+00A0') is '%C2%A0'
<html>
<head>
<script src="../../resources/js-test.js"></script>
<!DOCTYPE html>
<title>Character Encoding</title>
<script src="../../resources/testharness.js"></script>
<script src="../../resources/testharnessreport.js"></script>
<script src="resources/char-encoding-utils.js"></script>
</head>
<body>
<form id="form" method="GET" target="subframe"><input type="text" id="text" name="text"></form>
<iframe id="subframe" name="subframe"></iframe>
<script>
var charsets = new Array;
var unicodes = new Array;
var expectedResults = new Array;
var results = new Object;
var i = 0;
testEncode("UTF-8", "U+00A0", "%C2%A0");
testEncode('UTF-8', 'U+221A', '%E2%88%9A');
// Unpaired UTF-16 surrogates
testEncode("UTF-8", "0xD800", "%EF%BF%BD"); // U+FFFD (REPLACEMENT CHARACTER)
......@@ -69,13 +58,5 @@ testEncode("iso-2022-cn", "U+00A0", "%C2%A0");
testEncode("iso-2022-cn-ext", "U+00A0", "%C2%A0");
testEncode("iso-2022-kr", "U+00A0", "%C2%A0");
// Turning on this test causes a download to occur. FIXME: A bug?
// testEncode('UTF-8', 'U+221A', '%E2%88%9A');
if (window.testRunner)
testRunner.waitUntilDone();
runTest();
</script>
</body>
</html>
function decodeText(charsetName, characterSequence)
{
var req = new XMLHttpRequest;
req.open('GET', 'data:text/plain,' + characterSequence, false);
req.overrideMimeType('text/plain; charset="' + charsetName + '"');
function decodeText(charsetName, characterSequence) {
return new Promise((resolve, reject) => {
const req = new XMLHttpRequest;
req.open('GET', `data:text/plain,${characterSequence}`);
req.overrideMimeType(`text/plain; charset="${charsetName}"`);
req.send('');
return req.responseText;
req.onload = () => resolve(req.responseText);
req.onerror = () => reject(new Error(req.statusText));
});
}
function decode(charsetName, characterSequence)
{
var decodedText = decodeText(charsetName, characterSequence);
var result = "";
for (var i = 0; i < decodedText.length; ++i) {
var code = decodedText.charCodeAt(i).toString(16).toUpperCase();
if (i)
result += "/";
result += "U+" + ("0000" + code).slice(-4);
}
return result;
function decode(charsetName, characterSequence) {
return decodeText(charsetName, characterSequence).then(decodedText => {
return decodedText.split('')
.map(char => char.charCodeAt(0))
.map(code => 'U+' + ('0000' + code.toString(16).toUpperCase()).slice(-4))
.join('/');
});
}
function testDecode(charsetName, characterSequence, unicode)
{
shouldBe("decode('" + charsetName + "', '" + characterSequence + "')", "'" + unicode + "'");
function testDecode(charsetName, characterSequence, unicode) {
promise_test(t => {
return decode(charsetName, characterSequence).then(result => {
assert_equals(result, unicode);
});
}, `Decode ${charsetName}: ${characterSequence} => ${unicode}`);
}
function batchTestDecode(inputData)
{
for (var i in inputData.encodings) {
for (var j in inputData.encoded)
testDecode(inputData.encodings[i], inputData.encoded[j], inputData.unicode[j]);
function batchTestDecode(inputData) {
for (let i in inputData.encodings) {
for (let j in inputData.encoded) {
testDecode(inputData.encodings[i],
inputData.encoded[j],
inputData.unicode[j]);
}
}
}
function encode(charset, unicode)
{
// Returns a value already encoded, since we can't do it synchronously.
return results[charset][unicode];
}
function testsDone()
{
var form = document.getElementById('form');
var subframe = document.getElementById('subframe');
form.parentNode.removeChild(form);
subframe.parentNode.removeChild(subframe);
description("This tests encoding characters in various character sets.");
for (i = 0; i < charsets.length; ++i) {
shouldBe("encode('" + charsets[i] + "', '" + unicodes[i] + "')", "'" + expectedResults[i] + "'");
}
if (window.testRunner)
testRunner.notifyDone();
}
function processResult(result)
{
var charsetResults = results[charsets[i]];
if (!charsetResults) {
charsetResults = new Object;
results[charsets[i]] = charsetResults;
}
charsetResults[unicodes[i]] = result;
}
function subframeLoaded()
{
var URL = "" + document.getElementById('subframe').contentWindow.location;
processResult(URL.substr(URL.indexOf('=') + 1));
++i;
runTest();
}
function runTest()
{
if (i >= charsets.length) {
testsDone();
return;
}
var form = document.getElementById('form');
var text = document.getElementById('text');
var subframe = document.getElementById('subframe');
form.acceptCharset = charsets[i];
form.action = "resources/dummy.html";
subframe.onload = subframeLoaded;
text.value = String.fromCharCode(unicodes[i].replace('U+', '0x'));
let uniqueId = 0;
function encodeText(charsetName, unicode) {
return new Promise((resolve, reject) => {
const frame_id = `subframe${++uniqueId}`;
const iframe = document.createElement('iframe');
iframe.style.display = 'none';
// |iframe.name| must be assigned before adding frame to the body or
// |form.target| will not find it.
iframe.name = frame_id;
document.body.appendChild(iframe);
const form = document.body.appendChild(document.createElement('form'));
form.style.display = 'none';
form.method = 'GET';
form.action = 'resources/dummy.html';
form.acceptCharset = charsetName;
form.target = frame_id;
const input = form.appendChild(document.createElement('input'));
input.type = 'text';
input.name = 'text';
input.value = String.fromCharCode(unicode.replace('U+', '0x'));
iframe.onload = () => {
const url = iframe.contentWindow.location.href;
const result = url.substr(url.indexOf('=') + 1);
iframe.remove();
form.remove();
resolve(result);
};
form.submit();
});
}
function testEncode(charsetName, unicode, characterSequence)
{
charsets.push(charsetName);
unicodes.push(unicode);
expectedResults.push(characterSequence);
function testEncode(charsetName, unicode, characterSequence) {
promise_test(t => {
return encodeText(charsetName, unicode).then(result => {
assert_equals(result, characterSequence);
});
}, `Encode ${charsetName}: ${unicode} -> ${characterSequence}`);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment