Commit 97294238 authored by jsbell@chromium.org's avatar jsbell@chromium.org

Convert Encoding API tests to W3C testharness.js

These tests should be upstreamed to W3C web-platform-tests.

BUG=368310,368904

Review URL: https://codereview.chromium.org/240283013

git-svn-id: svn://svn.chromium.org/blink/trunk@173047 bbb929c8-8fbe-4397-9dbb-9b2b20218538
parent c5ed8609
Supersets of ASCII decode ASCII correctly
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
decoder = new TextDecoder("ibm866")
FAIL decoder = new TextDecoder("ibm866") threw exception TypeError: Failed to construct 'TextDecoder': The encoding label provided ('ibm866') is invalid.
decoder = new TextDecoder("iso-8859-2")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-8859-3")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-8859-4")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-8859-5")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-8859-6")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-8859-7")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-8859-8")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-8859-8-i")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-8859-10")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-8859-13")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-8859-14")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-8859-15")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-8859-16")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("koi8-r")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("koi8-u")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("macintosh")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("windows-874")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("windows-1250")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("windows-1251")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("windows-1252")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("windows-1253")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("windows-1254")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("windows-1255")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("windows-1256")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("windows-1257")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("windows-1258")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("x-mac-cyrillic")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("gbk")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("gb18030")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("hz-gb-2312")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("big5")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("euc-jp")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-2022-jp")
decoded = decoder.decode(new Uint8Array(bytes))
FAIL encodeURIComponent(string) should be %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%EF%BF%BD%EF%BF%BD%10%11%12%13%14%15%16%17%18%19%1A%1C%1D%1E%1F%20!%22%23%24%25%26'()*%2B%2C-.%2F0123456789%3A%3B%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%7F. Was %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1C%1D%1E%1F%20!%22%23%24%25%26'()*%2B%2C-.%2F0123456789%3A%3B%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%7F.
decoder = new TextDecoder("shift_jis")
decoded = decoder.decode(new Uint8Array(bytes))
FAIL encodeURIComponent(string) should be %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1C%1B%7F%1D%1E%1F%20!%22%23%24%25%26'()*%2B%2C-.%2F0123456789%3A%3B%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%1A. Was %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20!%22%23%24%25%26'()*%2B%2C-.%2F0123456789%3A%3B%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%7F.
decoder = new TextDecoder("euc-kr")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("iso-2022-kr")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
decoder = new TextDecoder("x-user-defined")
decoded = decoder.decode(new Uint8Array(bytes))
PASS encodeURIComponent(string) is encodeURIComponent(decoded)
PASS successfullyParsed is true
TEST COMPLETE
This is a testharness.js-based test.
PASS ASCII superset encoding: iso-8859-2
PASS ASCII superset encoding: iso-8859-3
PASS ASCII superset encoding: iso-8859-4
PASS ASCII superset encoding: iso-8859-5
PASS ASCII superset encoding: iso-8859-6
PASS ASCII superset encoding: iso-8859-7
PASS ASCII superset encoding: iso-8859-8
PASS ASCII superset encoding: iso-8859-8-i
PASS ASCII superset encoding: iso-8859-10
PASS ASCII superset encoding: iso-8859-13
PASS ASCII superset encoding: iso-8859-14
PASS ASCII superset encoding: iso-8859-15
PASS ASCII superset encoding: iso-8859-16
PASS ASCII superset encoding: koi8-r
PASS ASCII superset encoding: koi8-u
PASS ASCII superset encoding: macintosh
PASS ASCII superset encoding: windows-874
PASS ASCII superset encoding: windows-1250
PASS ASCII superset encoding: windows-1251
PASS ASCII superset encoding: windows-1252
PASS ASCII superset encoding: windows-1253
PASS ASCII superset encoding: windows-1254
PASS ASCII superset encoding: windows-1255
PASS ASCII superset encoding: windows-1256
PASS ASCII superset encoding: windows-1257
PASS ASCII superset encoding: windows-1258
PASS ASCII superset encoding: x-mac-cyrillic
PASS ASCII superset encoding: gbk
PASS ASCII superset encoding: gb18030
PASS ASCII superset encoding: hz-gb-2312
PASS ASCII superset encoding: big5
PASS ASCII superset encoding: euc-jp
FAIL ASCII superset encoding: iso-2022-jp assert_equals: expected "\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" but got "\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r��\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
FAIL ASCII superset encoding: shift_jis assert_equals: expected "\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" but got "\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1c\x1b\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x1a"
PASS ASCII superset encoding: euc-kr
PASS ASCII superset encoding: iso-2022-kr
PASS ASCII superset encoding: x-user-defined
Harness: the test ran to completion.
<!DOCTYPE html>
<script src="../../../resources/js-test.js"></script>
<title>Encoding API: ASCII supersets</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script src="resources/shared.js"></script>
<script>
description("Supersets of ASCII decode ASCII correctly");
// Encodings that have escape codes in 0x00-0x7F
var escape_codes = {
"hz-gb-2312": [ 0x7E ],
"iso-2022-jp": [ 0x1B ],
"iso-2022-kr": [ 0x0E, 0x0F, 0x1B ]
'hz-gb-2312': [ 0x7E ],
'iso-2022-jp': [ 0x1B ],
'iso-2022-kr': [ 0x0E, 0x0F, 0x1B ]
};
encodings_table.forEach(function(section) {
section.encodings.forEach(function(encoding) {
if (encoding.name === "replacement")
if (encoding.name === 'replacement')
return;
if (utf_encodings.indexOf(encoding.name) !== -1)
return;
string = '';
decoded = null;
bytes = [];
for (var i = 0; i < 128; ++i) {
if (encoding.name in escape_codes && escape_codes[encoding.name].indexOf(i) !== -1)
continue;
string += String.fromCharCode(i);
bytes.push(i);
}
test(function() {
var string = '';
var bytes = [];
for (var i = 0; i < 128; ++i) {
if (encoding.name in escape_codes && escape_codes[encoding.name].indexOf(i) !== -1)
continue;
string += String.fromCharCode(i);
bytes.push(i);
}
decoder = null;
evalAndLog("decoder = new TextDecoder(" + JSON.stringify(encoding.name) + ")");
if (decoder) {
evalAndLog("decoded = decoder.decode(new Uint8Array(bytes))");
// encodeURIComponent ensures output is printable
shouldBe("encodeURIComponent(string)", "encodeURIComponent(decoded)");
}
var decoder = new TextDecoder(encoding.name);
var decoded = decoder.decode(new Uint8Array(bytes));
assert_equals(decoded, string);
}, 'ASCII superset encoding: ' + encoding.name);
});
});
</script>
This tests the basics of the Encoding API.
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
PASS 'TextEncoder' in window is true
PASS 'TextDecoder' in window is true
PASS 'encoding' in new TextEncoder is true
PASS 'encoding' in new TextDecoder is true
PASS typeof (new TextEncoder).encoding is "string"
PASS typeof (new TextDecoder).encoding is "string"
PASS 'encode' in new TextEncoder is true
PASS 'decode' in new TextDecoder is true
PASS typeof (new TextEncoder).encode is "function"
PASS typeof (new TextDecoder).decode is "function"
PASS (new TextEncoder).encoding is "utf-8"
PASS (new TextDecoder).encoding is "utf-8"
test encode/decode sample - utf-8
encoded = new TextEncoder('utf-8').encode("z¢水𝄞􏿽")
PASS JSON.stringify(toArray(encoded)) is "[122,194,162,230,176,180,240,157,132,158,244,143,191,189]"
PASS new TextDecoder('utf-8').decode(new Uint8Array([122,194,162,230,176,180,240,157,132,158,244,143,191,189])) is "z¢水𝄞􏿽"
test encode/decode sample - utf-16le
encoded = new TextEncoder('utf-16le').encode("z¢水𝄞􏿽")
PASS JSON.stringify(toArray(encoded)) is "[122,0,162,0,52,108,52,216,30,221,255,219,253,223]"
PASS new TextDecoder('utf-16le').decode(new Uint8Array([122,0,162,0,52,108,52,216,30,221,255,219,253,223])) is "z¢水𝄞􏿽"
test encode/decode sample - utf-16be
encoded = new TextEncoder('utf-16be').encode("z¢水𝄞􏿽")
PASS JSON.stringify(toArray(encoded)) is "[0,122,0,162,108,52,216,52,221,30,219,255,223,253]"
PASS new TextDecoder('utf-16be').decode(new Uint8Array([0,122,0,162,108,52,216,52,221,30,219,255,223,253])) is "z¢水𝄞􏿽"
test encode/decode sample - utf-16
encoded = new TextEncoder('utf-16').encode("z¢水𝄞􏿽")
PASS JSON.stringify(toArray(encoded)) is "[122,0,162,0,52,108,52,216,30,221,255,219,253,223]"
PASS new TextDecoder('utf-16').decode(new Uint8Array([122,0,162,0,52,108,52,216,30,221,255,219,253,223])) is "z¢水𝄞􏿽"
PASS successfullyParsed is true
TEST COMPLETE
This is a testharness.js-based test.
PASS Encoding API basics
Harness: the test ran to completion.
<!DOCTYPE html>
<script src="../../../resources/js-test.js"></script>
<title>Encoding API: Basics</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script>
description("This tests the basics of the Encoding API.");
shouldBeTrue("'TextEncoder' in window");
shouldBeTrue("'TextDecoder' in window");
shouldBeTrue("'encoding' in new TextEncoder");
shouldBeTrue("'encoding' in new TextDecoder");
shouldBeEqualToString("typeof (new TextEncoder).encoding", "string");
shouldBeEqualToString("typeof (new TextDecoder).encoding", "string");
shouldBeTrue("'encode' in new TextEncoder");
shouldBeTrue("'decode' in new TextDecoder");
shouldBeEqualToString("typeof (new TextEncoder).encode", "function");
shouldBeEqualToString("typeof (new TextDecoder).decode", "function");
shouldBeEqualToString("(new TextEncoder).encoding", "utf-8");
shouldBeEqualToString("(new TextDecoder).encoding", "utf-8");
function toArray(arrayLike) {
return [].map.call(arrayLike, function(x) { return x; });
}
function testEncodeDecodeSample(encoding, string, bytes) {
debug("");
debug("test encode/decode sample - " + encoding);
evalAndLog("encoded = new TextEncoder('" + encoding + "').encode(" + JSON.stringify(string) + ")");
shouldBeEqualToString("JSON.stringify(toArray(encoded))", JSON.stringify(bytes));
shouldBeEqualToString("new TextDecoder('" + encoding + "').decode(new Uint8Array(" + JSON.stringify(bytes) + "))", string);
}
testEncodeDecodeSample(
"utf-8",
"z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD", // z, cent, CJK water, G-Clef, Private-use character
[0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xF4, 0x8F, 0xBF, 0xBD]
);
testEncodeDecodeSample(
"utf-16le",
"z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD", // z, cent, CJK water, G-Clef, Private-use character
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF]
);
testEncodeDecodeSample(
"utf-16be",
"z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD", // z, cent, CJK water, G-Clef, Private-use character
[0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xDB, 0xFF, 0xDF, 0xFD]
);
testEncodeDecodeSample(
"utf-16",
"z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD", // z, cent, CJK water, G-Clef, Private-use character
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF]
);
test(function() {
assert_true('TextEncoder' in window);
assert_true('TextDecoder' in window);
assert_true('encoding' in new TextEncoder);
assert_true('encoding' in new TextDecoder);
assert_equals(typeof (new TextEncoder).encoding, 'string');
assert_equals(typeof (new TextDecoder).encoding, 'string');
assert_true('encode' in new TextEncoder);
assert_true('decode' in new TextDecoder);
assert_equals(typeof (new TextEncoder).encode, 'function');
assert_equals(typeof (new TextDecoder).decode, 'function');
assert_equals((new TextEncoder).encoding, 'utf-8', 'default encoding is utf-8');
assert_equals((new TextDecoder).encoding, 'utf-8', 'default encoding is utf-8');
function testEncodeDecodeSample(encoding, string, bytes) {
var encoded = new TextEncoder(encoding).encode(string);
assert_array_equals([].slice.call(encoded), bytes);
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
}
// z (ASCII U+007A), cent (Latin-1 U+00A2), CJK water (BMP U+6C34),
// G-Clef (non-BMP U+1D11E), PUA (BMP U+F8FF), PUA (non-BMP U+10FFFD)
// byte-swapped BOM (non-character U+FFFE)
var sample = 'z\xA2\u6C34\uD834\uDD1E\uF8FF\uDBFF\uDFFD\uFFFE';
testEncodeDecodeSample(
'utf-8',
sample,
[0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xEF, 0xA3, 0xBF, 0xF4, 0x8F, 0xBF, 0xBD, 0xEF, 0xBF, 0xBE]
);
testEncodeDecodeSample(
'utf-16le',
sample,
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
);
testEncodeDecodeSample(
'utf-16be',
sample,
[0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xF8, 0xFF, 0xDB, 0xFF, 0xDF, 0xFD, 0xFF, 0xFE]
);
testEncodeDecodeSample(
'utf-16',
sample,
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
);
}, 'Encoding API basics');
</script>
Test the Encoding API's handling of byte-order marks (BOMs).
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
PASS new TextDecoder('utf-8').decode(new Uint8Array(utf8)) is "z¢水𝄞􏿽"
PASS new TextDecoder('utf-16le').decode(new Uint8Array(utf16le)) is "z¢水𝄞􏿽"
PASS new TextDecoder('utf-16be').decode(new Uint8Array(utf16be)) is "z¢水𝄞􏿽"
PASS new TextDecoder('utf-8').decode(new Uint8Array(utf8_bom.concat(utf8))) is "z¢水𝄞􏿽"
PASS new TextDecoder('utf-16le').decode(new Uint8Array(utf16le_bom.concat(utf16le))) is "z¢水𝄞􏿽"
PASS new TextDecoder('utf-16be').decode(new Uint8Array(utf16be_bom.concat(utf16be))) is "z¢水𝄞􏿽"
PASS new TextDecoder('utf-8').decode(new Uint8Array(utf16le_bom.concat(utf8))) is not "z¢水𝄞􏿽"
PASS new TextDecoder('utf-8').decode(new Uint8Array(utf16be_bom.concat(utf8))) is not "z¢水𝄞􏿽"
PASS new TextDecoder('utf-16le').decode(new Uint8Array(utf8_bom.concat(utf16le))) is not "z¢水𝄞􏿽"
PASS new TextDecoder('utf-16le').decode(new Uint8Array(utf16be_bom.concat(utf16le))) is not "z¢水𝄞􏿽"
PASS new TextDecoder('utf-16be').decode(new Uint8Array(utf8_bom.concat(utf16be))) is not "z¢水𝄞􏿽"
PASS new TextDecoder('utf-16be').decode(new Uint8Array(utf16le_bom.concat(utf16be))) is not "z¢水𝄞􏿽"
PASS successfullyParsed is true
TEST COMPLETE
This is a testharness.js-based test.
PASS Byte-order marks: utf-8
PASS Byte-order marks: utf-16le
PASS Byte-order marks: utf-16be
Harness: the test ran to completion.
<!DOCTYPE html>
<script src="../../../resources/js-test.js"></script>
<title>Encoding API: Byte-order marks</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script>
description("Test the Encoding API's handling of byte-order marks (BOMs).");
var utf8_bom = [0xEF, 0xBB, 0xBF];
var utf8 = [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xF4, 0x8F, 0xBF, 0xBD];
var utf16le_bom = [0xff, 0xfe];
var utf16le = [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF];
var utf16be_bom = [0xfe, 0xff];
var utf16be = [0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xDB, 0xFF, 0xDF, 0xFD];
var string = "z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD"; // z, cent, CJK water, G-Clef, Private-use character
// missing BOMs
shouldBeEqualToString("new TextDecoder('utf-8').decode(new Uint8Array(utf8))", string);
shouldBeEqualToString("new TextDecoder('utf-16le').decode(new Uint8Array(utf16le))", string);
shouldBeEqualToString("new TextDecoder('utf-16be').decode(new Uint8Array(utf16be))", string);
// matching BOMs
shouldBeEqualToString("new TextDecoder('utf-8').decode(new Uint8Array(utf8_bom.concat(utf8)))", string);
shouldBeEqualToString("new TextDecoder('utf-16le').decode(new Uint8Array(utf16le_bom.concat(utf16le)))", string);
shouldBeEqualToString("new TextDecoder('utf-16be').decode(new Uint8Array(utf16be_bom.concat(utf16be)))", string);
// mismatching BOMs
shouldNotBe("new TextDecoder('utf-8').decode(new Uint8Array(utf16le_bom.concat(utf8)))", JSON.stringify(string));
shouldNotBe("new TextDecoder('utf-8').decode(new Uint8Array(utf16be_bom.concat(utf8)))", JSON.stringify(string));
shouldNotBe("new TextDecoder('utf-16le').decode(new Uint8Array(utf8_bom.concat(utf16le)))", JSON.stringify(string));
shouldNotBe("new TextDecoder('utf-16le').decode(new Uint8Array(utf16be_bom.concat(utf16le)))", JSON.stringify(string));
shouldNotBe("new TextDecoder('utf-16be').decode(new Uint8Array(utf8_bom.concat(utf16be)))", JSON.stringify(string));
shouldNotBe("new TextDecoder('utf-16be').decode(new Uint8Array(utf16le_bom.concat(utf16be)))", JSON.stringify(string));
// FIXME: Add tests where the BOM is split across buffers.
var testCases = [
{
encoding: 'utf-8',
bom: [0xEF, 0xBB, 0xBF],
bytes: [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xF4, 0x8F, 0xBF, 0xBD]
},
{
encoding: 'utf-16le',
bom: [0xff, 0xfe],
bytes: [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF]
},
{
encoding: 'utf-16be',
bom: [0xfe, 0xff],
bytes: [0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xDB, 0xFF, 0xDF, 0xFD]
}
];
var string = 'z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD'; // z, cent, CJK water, G-Clef, Private-use character
testCases.forEach(function(t) {
test(function() {
var decoder = new TextDecoder(t.encoding);
assert_equals(decoder.decode(new Uint8Array(t.bytes)), string,
'Sequence without BOM should decode successfully');
assert_equals(decoder.decode(new Uint8Array(t.bom.concat(t.bytes))), string,
'Sequence with BOM should decode successfully (with no BOM present in output)');
testCases.forEach(function(o) {
if (o === t)
return;
assert_not_equals(decoder.decode(new Uint8Array(o.bom.concat(t.bytes))), string,
'Mismatching BOM should not be ignored - treated as garbage bytes.');
});
}, 'Byte-order marks: ' + t.encoding);
});
</script>
<!DOCTYPE html>
<script src="../../../resources/js-test.js"></script>
<title>Encoding API: Encoding labels</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script src="resources/shared.js"></script>
<script>
description("Test the Encoding API's use of encoding names");
encodings_table.forEach(function(section) {
var list = section.encodings;
list.forEach(function(encoding) {
debug("");
evalAndLog("name = " + JSON.stringify(encoding.name));
encoding.labels.forEach(function(label) {
shouldBeEqualToString("new TextDecoder(" + JSON.stringify(label) + ").encoding", encoding.name);
});
section.encodings.forEach(function(encoding) {
var name = encoding.name;
test(function(){
encoding.labels.forEach(function(label) {
assert_equals(new TextDecoder(label).encoding, encoding.name);
});
}, 'Labels for: ' + name);
});
});
......
Test the Encoding API's use of encoding names
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
Encoding names are case insensitive
PASS new TextDecoder('utf-8').encoding is "utf-8"
PASS new TextDecoder('UTF-8').encoding is "utf-8"
PASS new TextDecoder('utf-16').encoding is "utf-16le"
PASS new TextDecoder('UTF-16').encoding is "utf-16le"
PASS new TextDecoder('utf-16le').encoding is "utf-16le"
PASS new TextDecoder('UTF-16LE').encoding is "utf-16le"
PASS new TextDecoder('utf-16be').encoding is "utf-16be"
PASS new TextDecoder('UTF-16BE').encoding is "utf-16be"
PASS new TextDecoder('ascii').encoding is "windows-1252"
PASS new TextDecoder('ASCII').encoding is "windows-1252"
PASS new TextDecoder('iso-8859-1').encoding is "windows-1252"
PASS new TextDecoder('ISO-8859-1').encoding is "windows-1252"
PASS successfullyParsed is true
TEST COMPLETE
This is a testharness.js-based test.
PASS Encoding labels are case-insensitive
Harness: the test ran to completion.
<!DOCTYPE html>
<script src="../../../resources/js-test.js"></script>
<title>Encoding API: Encoding names</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script src="resources/shared.js"></script>
<script>
description("Test the Encoding API's use of encoding names");
test(function() {
debug("Encoding names are case insensitive");
var encodings = [
{ label: 'utf-8', encoding: 'utf-8' },
{ label: 'utf-16', encoding: 'utf-16le' },
{ label: 'utf-16le', encoding: 'utf-16le' },
{ label: 'utf-16be', encoding: 'utf-16be' },
{ label: 'ascii', encoding: 'windows-1252' },
{ label: 'iso-8859-1', encoding: 'windows-1252' }
];
var encodings = [
{ label: 'utf-8', encoding: 'utf-8' },
{ label: 'utf-16', encoding: 'utf-16le' },
{ label: 'utf-16le', encoding: 'utf-16le' },
{ label: 'utf-16be', encoding: 'utf-16be' },
{ label: 'ascii', encoding: 'windows-1252' },
{ label: 'iso-8859-1', encoding: 'windows-1252' }
];
// encoding-labels.html tests the full set of names/labels; this test just
// exercises some common cases and case-insensitivity.
// encoding-labels.html tests the full set of names/labels; this test just
// exercises some common cases and case-insensitivity.
encodings.forEach(function(test) {
shouldBeEqualToString("new TextDecoder('" + test.label.toLowerCase() + "').encoding", test.encoding);
shouldBeEqualToString("new TextDecoder('" + test.label.toUpperCase() + "').encoding", test.encoding);
});
encodings.forEach(function(t) {
assert_equals(new TextDecoder(t.label.toLowerCase()).encoding, t.encoding);
assert_equals(new TextDecoder(t.label.toUpperCase()).encoding, t.encoding);
});
}, 'Encoding labels are case-insensitive');
</script>
Edge cases around non-fatal errors at EOF
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
PASS new TextDecoder('utf-8', {fatal: true}).decode(new Uint8Array([0xff])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS new TextDecoder('utf-8').decode(new Uint8Array([0xff])) is '�'
PASS new TextDecoder('utf-16le', {fatal: true}).decode(new Uint8Array([0x00])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS new TextDecoder('utf-16le').decode(new Uint8Array([0x00])) is '�'
PASS new TextDecoder('utf-16be', {fatal: true}).decode(new Uint8Array([0x00])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS new TextDecoder('utf-16be').decode(new Uint8Array([0x00])) is '�'
Streaming cases:
decoder = new TextDecoder('utf-16le', {fatal: true})
odd = new Uint8Array([0x00])
even = new Uint8Array([0x00, 0x00])
PASS decoder.decode(odd, {stream: true}); decoder.decode(odd) did not throw exception.
PASS decoder.decode(even, {stream: true}); decoder.decode(odd) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS decoder.decode(odd, {stream: true}); decoder.decode(even) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS decoder.decode(even, {stream: true}); decoder.decode(even) did not throw exception.
PASS successfullyParsed is true
TEST COMPLETE
This is a testharness.js-based test.
PASS Fatal flag, non-streaming cases
PASS Fatal flag, streaming cases
Harness: the test ran to completion.
<!DOCTYPE html>
<script src="../../../resources/js-test.js"></script>
<title>Encoding API: End-of-file</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script>
description("Edge cases around non-fatal errors at EOF");
test(function() {
[
{encoding: 'utf-8', sequence: [0xC0]},
{encoding: 'utf-16le', sequence: [0x00]},
{encoding: 'utf-16be', sequence: [0x00]}
].forEach(function(testCase) {
shouldThrow("new TextDecoder('utf-8', {fatal: true}).decode(new Uint8Array([0xff]))");
assert_throws({name: 'EncodingError'}, function() {
var decoder = new TextDecoder(testCase.encoding, {fatal: true});
decoder.decode(new Uint8Array(testCase.sequence));
}, 'Unterminated ' + testCase.encoding + ' sequence should throw if fatal flag is set');
debug("");
shouldBe("new TextDecoder('utf-8').decode(new Uint8Array([0xff]))", "'\uFFFD'");
assert_equals(
new TextDecoder(testCase.encoding).decode(new Uint8Array([testCase.sequence])),
'\uFFFD',
'Unterminated UTF-8 sequence should emit replacement character if fatal flag is unset');
});
}, 'Fatal flag, non-streaming cases');
debug("");
shouldThrow("new TextDecoder('utf-16le', {fatal: true}).decode(new Uint8Array([0x00]))");
test(function() {
debug("");
shouldBe("new TextDecoder('utf-16le').decode(new Uint8Array([0x00]))", "'\uFFFD'");
var decoder = new TextDecoder('utf-16le', {fatal: true});
var odd = new Uint8Array([0x00]);
var even = new Uint8Array([0x00, 0x00]);
debug("");
shouldThrow("new TextDecoder('utf-16be', {fatal: true}).decode(new Uint8Array([0x00]))");
assert_equals(decoder.decode(odd, {stream: true}), '');
assert_equals(decoder.decode(odd), '\u0000');
debug("");
shouldBe("new TextDecoder('utf-16be').decode(new Uint8Array([0x00]))", "'\uFFFD'");
assert_throws({name: 'EncodingError'}, function() {
decoder.decode(even, {stream: true});
decoder.decode(odd)
});
debug("");
debug("Streaming cases:");
evalAndLog("decoder = new TextDecoder('utf-16le', {fatal: true})");
evalAndLog("odd = new Uint8Array([0x00])");
evalAndLog("even = new Uint8Array([0x00, 0x00])");
assert_throws({name: 'EncodingError'}, function() {
decoder.decode(odd, {stream: true});
decoder.decode(even);
});
debug("");
shouldNotThrow("decoder.decode(odd, {stream: true}); decoder.decode(odd)");
shouldThrow("decoder.decode(even, {stream: true}); decoder.decode(odd)");
shouldThrow("decoder.decode(odd, {stream: true}); decoder.decode(even)");
shouldNotThrow("decoder.decode(even, {stream: true}); decoder.decode(even)");
assert_equals(decoder.decode(even, {stream: true}), '\u0000');
assert_equals(decoder.decode(even), '\u0000');
}, 'Fatal flag, streaming cases');
</script>
Test the Encoding API's 'fatal' flag
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
PASS new TextDecoder('utf-8', {fatal: true}).decode(new Uint8Array([0xC0])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS new TextDecoder('utf-8', {fatal: true}).decode(new Uint8Array([0xC0, 0x00])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS new TextDecoder('utf-8', {fatal: true}).decode(new Uint8Array([0xC0, 0xC0])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS new TextDecoder('utf-8', {fatal: true}).decode(new Uint8Array([0xE0])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS new TextDecoder('utf-8', {fatal: true}).decode(new Uint8Array([0xE0, 0x00])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS new TextDecoder('utf-8', {fatal: true}).decode(new Uint8Array([0xE0, 0xC0])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS new TextDecoder('utf-8', {fatal: true}).decode(new Uint8Array([0xE0, 0x80, 0x00])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS new TextDecoder('utf-8', {fatal: true}).decode(new Uint8Array([0xE0, 0x80, 0xC0])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS new TextDecoder('utf-8', {fatal: true}).decode(new Uint8Array([0xFC, 0x80, 0x80, 0x80, 0x80, 0x80])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
PASS new TextDecoder('utf-16le', {fatal: true}).decode(new Uint8Array([0x00])) threw exception EncodingError: Failed to execute 'decode' on 'TextDecoder': The encoded data was not valid..
FAIL new TextDecoder('utf-16le', {fatal: true}).decode(new Uint8Array([0x00, 0xd8])) should throw an exception. Was .
FAIL new TextDecoder('utf-16le', {fatal: true}).decode(new Uint8Array([0x00, 0xd8, 0x00, 0x00])) should throw an exception. Was \0.
FAIL new TextDecoder('utf-16le', {fatal: true}).decode(new Uint8Array([0x00, 0xdc, 0x00, 0x00])) should throw an exception. Was \0.
FAIL new TextDecoder('utf-16le', {fatal: true}).decode(new Uint8Array([0x00, 0xdc, 0x00, 0xd8])) should throw an exception. Was .
PASS successfullyParsed is true
TEST COMPLETE
This is a testharness.js-based test.
PASS Fatal flag: utf-8 - invalid code
PASS Fatal flag: utf-8 - ends early
PASS Fatal flag: utf-8 - invalid trail
PASS Fatal flag: utf-8 - invalid trail
PASS Fatal flag: utf-8 - ends early
PASS Fatal flag: utf-8 - invalid trail
PASS Fatal flag: utf-8 - invalid trail
PASS Fatal flag: utf-8 - invalid trail
PASS Fatal flag: utf-8 - invalid trail
PASS Fatal flag: utf-8 - > 0x10FFFF
PASS Fatal flag: utf-8 - obsolete lead byte
PASS Fatal flag: utf-8 - overlong U+0000 - 2 bytes
PASS Fatal flag: utf-8 - overlong U+0000 - 3 bytes
PASS Fatal flag: utf-8 - overlong U+0000 - 4 bytes
PASS Fatal flag: utf-8 - overlong U+0000 - 5 bytes
PASS Fatal flag: utf-8 - overlong U+0000 - 6 bytes
PASS Fatal flag: utf-8 - overlong U+007F - 2 bytes
PASS Fatal flag: utf-8 - overlong U+007F - 3 bytes
PASS Fatal flag: utf-8 - overlong U+007F - 4 bytes
PASS Fatal flag: utf-8 - overlong U+007F - 5 bytes
PASS Fatal flag: utf-8 - overlong U+007F - 6 bytes
PASS Fatal flag: utf-8 - overlong U+07FF - 3 bytes
PASS Fatal flag: utf-8 - overlong U+07FF - 4 bytes
PASS Fatal flag: utf-8 - overlong U+07FF - 5 bytes
PASS Fatal flag: utf-8 - overlong U+07FF - 6 bytes
PASS Fatal flag: utf-8 - overlong U+FFFF - 4 bytes
PASS Fatal flag: utf-8 - overlong U+FFFF - 5 bytes
PASS Fatal flag: utf-8 - overlong U+FFFF - 6 bytes
PASS Fatal flag: utf-8 - overlong U+10FFFF - 5 bytes
PASS Fatal flag: utf-8 - overlong U+10FFFF - 6 bytes
PASS Fatal flag: utf-8 - lead surrogate
PASS Fatal flag: utf-8 - trail surrogate
PASS Fatal flag: utf-8 - surrogate pair
PASS Fatal flag: utf-16le - truncated code unit
Harness: the test ran to completion.
<!DOCTYPE html>
<script src="../../../resources/js-test.js"></script>
<title>Encoding API: Fatal flag</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script src="resources/shared.js"></script>
<script>
description("Test the Encoding API's 'fatal' flag");
var bad = [
{ encoding: 'utf-8', input: "[0xC0]" }, // ends early
{ encoding: 'utf-8', input: "[0xC0, 0x00]" }, // invalid trail
{ encoding: 'utf-8', input: "[0xC0, 0xC0]" }, // invalid trail
{ encoding: 'utf-8', input: "[0xE0]" }, // ends early
{ encoding: 'utf-8', input: "[0xE0, 0x00]" }, // invalid trail
{ encoding: 'utf-8', input: "[0xE0, 0xC0]" }, // invalid trail
{ encoding: 'utf-8', input: "[0xE0, 0x80, 0x00]" }, // invalid trail
{ encoding: 'utf-8', input: "[0xE0, 0x80, 0xC0]" }, // invalid trail
{ encoding: 'utf-8', input: "[0xFC, 0x80, 0x80, 0x80, 0x80, 0x80]" }, // > 0x10FFFF
{ encoding: 'utf-16le', input: "[0x00]" }, // truncated code unit
{ encoding: 'utf-16le', input: "[0x00, 0xd8]" }, // surrogate half
{ encoding: 'utf-16le', input: "[0x00, 0xd8, 0x00, 0x00]" }, // surrogate half
{ encoding: 'utf-16le', input: "[0x00, 0xdc, 0x00, 0x00]" }, // trail surrogate
{ encoding: 'utf-16le', input: "[0x00, 0xdc, 0x00, 0xd8]" } // swapped surrogates
// FIXME: Legacy encoding cases
{ encoding: 'utf-8', input: [0xFF], name: 'invalid code' },
{ encoding: 'utf-8', input: [0xC0], name: 'ends early' },
{ encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' },
{ encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail' },
{ encoding: 'utf-8', input: [0xE0], name: 'ends early' },
{ encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail' },
{ encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail' },
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail' },
{ encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], name: '> 0x10FFFF' },
{ encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'obsolete lead byte' },
// Overlong encodings
{ encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' },
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x80], name: 'overlong U+0000 - 3 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 6 bytes' },
{ encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' },
{ encoding: 'utf-8', input: [0xE0, 0x81, 0xBF], name: 'overlong U+007F - 3 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 6 bytes' },
{ encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF], name: 'overlong U+07FF - 3 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 6 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 6 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF], name: 'overlong U+10FFFF - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF], name: 'overlong U+10FFFF - 6 bytes' },
// UTF-16 surrogates encoded as code points in UTF-8
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' },
{ encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' },
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], name: 'surrogate pair' },
{ encoding: 'utf-16le', input: [0x00], name: 'truncated code unit' },
// Mismatched UTF-16 surrogates are exercised in utf16-surrogates.html
// FIXME: Add legacy encoding cases
];
bad.forEach(function(t) {
shouldThrow("new TextDecoder('" + t.encoding + "', {fatal: true}).decode(new Uint8Array(" + t.input + "))");
test(function() {
assert_throws({name: 'EncodingError'}, function() {
new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input))
});
}, 'Fatal flag: ' + t.encoding + " - " + t.name);
});
</script>
Verify that Latin-1 decoders (windows-1252, iso-8859-1, us-ascii, etc) decode identically.
This is a testharness.js-based test.
PASS Latin-1 decoders (windows-1252, iso-8859-1, us-ascii, etc) decode identically.
Harness: the test ran to completion.
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
array = new Uint8Array(256)
initialize array to 0...255
windows1252 = new TextDecoder('windows-1252')
decoder = new TextDecoder("ansi_x3.4-1968")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("ascii")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("cp1252")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("cp819")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("csisolatin1")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("ibm819")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("iso-8859-1")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("iso-ir-100")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("iso8859-1")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("iso88591")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("iso_8859-1")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("iso_8859-1:1987")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("l1")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("latin1")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("us-ascii")
PASS decoder.decode(array) is windows1252.decode(array)
decoder = new TextDecoder("x-cp1252")
PASS decoder.decode(array) is windows1252.decode(array)
PASS successfullyParsed is true
TEST COMPLETE
B
B<!DOCTYPE html>
<script src="../../../resources/js-test.js"></script>
<!DOCTYPE html>
<title>Encoding API: Latin-1 decoders</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script src="resources/shared.js"></script>
<script>
description("Verify that Latin-1 decoders (windows-1252, iso-8859-1, us-ascii, etc) decode identically.");
// Blink uses separate decoder object intances for these encoding aliases,
// so test that they are behaving identically.
var labels;
encodings_table.forEach(function(section) {
section.encodings.forEach(function(encoding) {
if (encoding.name === "windows-1252")
labels = encoding.labels;
test(function() {
var labels;
encodings_table.forEach(function(section) {
section.encodings.forEach(function(encoding) {
if (encoding.name === 'windows-1252')
labels = encoding.labels;
});
});
});
labels = labels.filter(function(label) { return label !== 'windows-1252'; });
evalAndLog("array = new Uint8Array(256)");
debug("initialize array to 0...255");
for (var cp = 0; cp <= 255; ++cp) {
array[cp] = cp;
}
evalAndLog("windows1252 = new TextDecoder('windows-1252')");
labels.forEach(function(label) {
decoder = null;
evalAndLog("decoder = new TextDecoder(" + JSON.stringify(label) + ")");
// Above may throw if encoding unsupported.
if (decoder) {
shouldBe("decoder.decode(array)", "windows1252.decode(array)");
labels = labels.filter(function(label) { return label !== 'windows-1252'; });
var array = new Uint8Array(256);
for (var cp = 0; cp <= 255; ++cp) {
array[cp] = cp;
}
});
var windows1252 = new TextDecoder('windows-1252');
labels.forEach(function(label) {
var decoder = new TextDecoder(label);
assert_equals(decoder.decode(array), windows1252.decode(array));
});
}, 'Latin-1 decoders (windows-1252, iso-8859-1, us-ascii, etc) decode identically.');
</script>
<!DOCTYPE html>
<script src="../../../resources/js-test.js"></script>
<title>Encoding API: Legacy encodings</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script src="resources/shared.js"></script>
<script>
description("Non-UTF encodings supported only for decode, not encode");
encodings_table.forEach(function(section) {
section.encodings.forEach(function(encoding) {
if (encoding.name === "replacement")
return;
if (utf_encodings.indexOf(encoding.name) !== -1) {
shouldBeEqualToString("new TextDecoder(" + JSON.stringify(encoding.name) + ").encoding", encoding.name);
shouldBeEqualToString("new TextEncoder(" + JSON.stringify(encoding.name) + ").encoding", encoding.name);
test(function() {
assert_equals(new TextDecoder(encoding.name).encoding, encoding.name);
assert_equals(new TextEncoder(encoding.name).encoding, encoding.name);
}, "UTF encodings are supported for encode and decode");
} else {
shouldBeEqualToString("new TextDecoder(" + JSON.stringify(encoding.name) + ").encoding", encoding.name);
shouldThrow("new TextEncoder(" + JSON.stringify(encoding.name) + ").encoding");
test(function() {
assert_equals(new TextDecoder(encoding.name).encoding, encoding.name);
assert_throws({name:'TypeError'}, function() { new TextEncoder(encoding.name); });
}, "Non-UTF encodings supported only for decode, not encode");
}
});
});
......
......@@ -18,6 +18,7 @@ encodings_table = [
},
{
"encodings": [
/* FIXME: Support ibm866 (crbug.com/277023)
{
"labels": [
"866",
......@@ -27,6 +28,7 @@ encodings_table = [
],
"name": "ibm866"
},
*/
{
"labels": [
"csisolatin2",
......
<!DOCTYPE html>
<script src="../../../resources/js-test.js"></script>
<title>Encoding API: Streaming decode</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script src="resources/shared.js"></script>
<script>
description("Test streaming decoding using the Encoding API.");
evalAndLog("string = '\\x00123ABCabc\\x80\\xFF\\u0100\\u1000\\uFFFD\\uD800\\uDC00\\uDBFF\\uDFFF'");
var string = '\\x00123ABCabc\\x80\\xFF\\u0100\\u1000\\uFFFD\\uD800\\uDC00\\uDBFF\\uDFFF';
utf_encodings.forEach(function (encoding) {
debug("");
evalAndLog("encoded = new TextEncoder('" + encoding + "').encode(string)");
for (var len = 1; len <= 5; ++len) {
evalAndLog("out = ''");
evalAndLog("decoder = new TextDecoder('" + encoding + "')");
for (var i = 0; i < encoded.length; i += len) {
var sub = [];
for (var j = i; j < encoded.length && j < i + len; ++j) {
sub.push(encoded[j]);
test(function() {
var encoded = new TextEncoder(encoding).encode(string);
var out = '';
var decoder = new TextDecoder(encoding);
for (var i = 0; i < encoded.length; i += len) {
var sub = [];
for (var j = i; j < encoded.length && j < i + len; ++j)
sub.push(encoded[j]);
out += decoder.decode(new Uint8Array(sub), {stream: true});
}
evalAndLog("out += decoder.decode(new Uint8Array(" + JSON.stringify(sub) + "), {stream: true})");
}
evalAndLog("out += decoder.decode()");
shouldBeEqualToString("out", string);
out += decoder.decode();
assert_equals(out, string);
}, 'Streaming decode: ' + encoding + ', ' + len + ' byte window');
}
});
......
Test invalid UTF-16 surrogate pairs with UTF-8 encoding
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
encoded = new TextEncoder('utf-8').encode('abc123')
PASS JSON.stringify([].slice.call(encoded)) is "[97,98,99,49,50,51]"
encoded = new TextEncoder('utf-8').encode('\ud800')
PASS JSON.stringify([].slice.call(encoded)) is "[239,191,189]"
encoded = new TextEncoder('utf-8').encode('\udc00')
PASS JSON.stringify([].slice.call(encoded)) is "[239,191,189]"
encoded = new TextEncoder('utf-8').encode('abc\ud800123')
PASS JSON.stringify([].slice.call(encoded)) is "[97,98,99,239,191,189,49,50,51]"
encoded = new TextEncoder('utf-8').encode('abc\udc00123')
PASS JSON.stringify([].slice.call(encoded)) is "[97,98,99,239,191,189,49,50,51]"
encoded = new TextEncoder('utf-8').encode('\udc00\ud800')
PASS JSON.stringify([].slice.call(encoded)) is "[239,191,189,239,191,189]"
PASS successfullyParsed is true
TEST COMPLETE
<!DOCTYPE html>
<script src="../../../resources/js-test.js"></script>
<script>
description("Test invalid UTF-16 surrogate pairs with UTF-8 encoding");
var badStrings = [
{ input: "'abc123'", expected: [97, 98, 99, 49, 50, 51] }, // Sanity check.
{ input: "'\\ud800'", expected: [0xef, 0xbf, 0xbd] }, // Surrogate half.
{ input: "'\\udc00'", expected: [0xef, 0xbf, 0xbd] }, // Surrogate half.
{ input: "'abc\\ud800123'", expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33] }, // Surrogate half.
{ input: "'abc\\udc00123'", expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33] }, // Surrogate half.
{ input: "'\\udc00\\ud800'", expected: [239, 191, 189, 239, 191, 189] } // Wrong order.
];
badStrings.forEach(
function(t) {
evalAndLog("encoded = new TextEncoder('utf-8').encode(" + t.input + ")");
shouldBeEqualToString("JSON.stringify([].slice.call(encoded))", JSON.stringify(t.expected));
debug("");
});
</script>
Sanity check the Encoding API's handling of UTF encodings.
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
utf-8 - Encode/Decode Range U+0000 - U+10FFFF
no output means all ranges matched
utf-16le - Encode/Decode Range U+0000 - U+10FFFF
no output means all ranges matched
utf-16be - Encode/Decode Range U+0000 - U+10FFFF
no output means all ranges matched
UTF-8 encoding (compare against unescape/encodeURIComponent)
no output means all ranges matched
UTF-8 decoding (compare against decodeURIComponent/escape)
no output means all ranges matched
PASS successfullyParsed is true
TEST COMPLETE
This is a testharness.js-based test.
PASS utf-8 - encode/decode round trip
PASS utf-16le - encode/decode round trip
PASS utf-16be - encode/decode round trip
PASS UTF-8 encoding (compare against unescape/encodeURIComponent)
PASS UTF-8 decoding (compare against decodeURIComponent/escape)
Harness: the test ran to completion.
<!DOCTYPE html>
<script src="../../../resources/js-test.js"></script>
<title>Encoding API: UTF encoding round trips</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script src="resources/shared.js"></script>
<script>
description("Sanity check the Encoding API's handling of UTF encodings.");
BATCH_SIZE = 0x1000; // Convert in batches spanning this made code points.
SKIP_SIZE = 0x77; // For efficiency, don't test every code point.
quiet = true; // Don't log every matching range.
var BATCH_SIZE = 0x1000; // Convert in batches spanning this many code points.
var SKIP_SIZE = 0x77; // For efficiency, don't test every code point.
function fromCodePoint(cp) {
if (0xd800 <= cp && cp <= 0xdfff) throw new Error('Invalid code point');
if (0xD800 <= cp && cp <= 0xDFFF) throw new Error('Invalid code point');
if (cp > 0xffff) {
// outside BMP - encode as surrogate pair
return String.fromCharCode(0xd800 + ((cp >> 10) & 0x3ff), 0xdc00 + (cp & 0x3ff));
}
return String.fromCharCode(i);
if (cp <= 0xFFFF)
return String.fromCharCode(cp);
// outside BMP - encode as surrogate pair
return String.fromCharCode(0xD800 + ((cp >> 10) & 0x3FF), 0xDC00 + (cp & 0x3FF));
}
function makeBatch(cp) {
var string = '';
for (var i = cp; i < cp + BATCH_SIZE && cp < 0x10FFFF; i += SKIP_SIZE) {
if (0xd800 <= i && i <= 0xdfff) {
if (0xD800 <= i && i <= 0xDFFF) {
// surrogate half
continue;
}
......@@ -31,28 +30,15 @@ function makeBatch(cp) {
return string;
}
function testEncodeDecode(encoding, min, max) {
debug(encoding + " - Encode/Decode Range " + cpname(min) + " - " + cpname(max));
function cpname(n) {
return 'U+' + ((n <= 0xFFFF) ?
('0000' + n.toString(16).toUpperCase()).slice(-4) :
n.toString(16).toUpperCase());
}
for (i = min; i < max; i += BATCH_SIZE) {
string = makeBatch(i);
encoded = new TextEncoder(encoding).encode(string);
decoded = new TextDecoder(encoding).decode(encoded);
shouldBe("string", "decoded", quiet);
}
debug("no output means all ranges matched");
debug("");
}
utf_encodings.forEach(function(encoding) {
testEncodeDecode(encoding, 0, 0x10FFFF);
test(function() {
for (var i = 0; i < 0x10FFFF; i += BATCH_SIZE) {
var string = makeBatch(i);
var encoded = new TextEncoder(encoding).encode(string);
var decoded = new TextDecoder(encoding).decode(encoded);
assert_equals(decoded, string);
}
}, encoding + ' - encode/decode round trip');
});
......@@ -60,10 +46,9 @@ utf_encodings.forEach(function(encoding) {
// http://ecmanaut.blogspot.com/2006/07/encoding-decoding-utf8-in-javascript.html
function encode_utf8(string) {
var utf8 = unescape(encodeURIComponent(string));
var octets = [], i;
for (i = 0; i < utf8.length; i += 1) {
var octets = [];
for (var i = 0; i < utf8.length; i += 1)
octets.push(utf8.charCodeAt(i));
}
return octets;
}
......@@ -72,25 +57,23 @@ function decode_utf8(octets) {
return decodeURIComponent(escape(utf8));
}
debug("UTF-8 encoding (compare against unescape/encodeURIComponent)");
for (i = 0; i < 0x10FFFF; i += BATCH_SIZE) {
str = makeBatch(i);
expected = encode_utf8(str);
actual = new TextEncoder('UTF-8').encode(str);
shouldBe("actual", "expected", quiet);
}
debug("no output means all ranges matched");
debug("");
debug("UTF-8 decoding (compare against decodeURIComponent/escape)");
for (i = 0; i < 0x10FFFF; i += BATCH_SIZE) {
str = makeBatch(i);
encoded = encode_utf8(str);
expected = decode_utf8(encoded);
actual = new TextDecoder('UTF-8').decode(new Uint8Array(encoded));
shouldBe("actual", "expected", quiet);
}
debug("no output means all ranges matched");
debug("");
test(function() {
for (var i = 0; i < 0x10FFFF; i += BATCH_SIZE) {
var string = makeBatch(i);
var expected = encode_utf8(string);
var actual = new TextEncoder('UTF-8').encode(string);
assert_array_equals(actual, expected);
}
}, 'UTF-8 encoding (compare against unescape/encodeURIComponent)');
test(function() {
for (var i = 0; i < 0x10FFFF; i += BATCH_SIZE) {
var string = makeBatch(i);
var encoded = encode_utf8(string);
var expected = decode_utf8(encoded);
var actual = new TextDecoder('UTF-8').decode(new Uint8Array(encoded));
assert_equals(actual, expected);
}
}, 'UTF-8 decoding (compare against decodeURIComponent/escape)');
</script>
This is a testharness.js-based test.
FAIL utf-16le - lone surrogate lead assert_equals: expected "�" but got ""
FAIL utf-16le - lone surrogate lead (fatal flag set) assert_throws: function "function () {
new TextDecoder(t.encoding, {fa..." did not throw
FAIL utf-16le - lone surrogate trail assert_equals: expected "�" but got ""
FAIL utf-16le - lone surrogate trail (fatal flag set) assert_throws: function "function () {
new TextDecoder(t.encoding, {fa..." did not throw
FAIL utf-16le - unmatched surrogate lead assert_equals: expected "�\0" but got "\0"
FAIL utf-16le - unmatched surrogate lead (fatal flag set) assert_throws: function "function () {
new TextDecoder(t.encoding, {fa..." did not throw
FAIL utf-16le - unmatched surrogate trail assert_equals: expected "�\0" but got "\0"
FAIL utf-16le - unmatched surrogate trail (fatal flag set) assert_throws: function "function () {
new TextDecoder(t.encoding, {fa..." did not throw
FAIL utf-16le - swapped surrogate pair assert_equals: expected "��" but got ""
FAIL utf-16le - swapped surrogate pair (fatal flag set) assert_throws: function "function () {
new TextDecoder(t.encoding, {fa..." did not throw
Harness: the test ran to completion.
<!DOCTYPE html>
<title>Encoding API: UTF-16 surrogate handling</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script src="resources/shared.js"></script>
<script>
var bad = [
{
encoding: 'utf-16le',
input: [0x00, 0xd8],
expected: '\uFFFD',
name: 'lone surrogate lead'
},
{
encoding: 'utf-16le',
input: [0x00, 0xdc],
expected: '\uFFFD',
name: 'lone surrogate trail'
},
{
encoding: 'utf-16le',
input: [0x00, 0xd8, 0x00, 0x00],
expected: '\uFFFD\u0000',
name: 'unmatched surrogate lead'
},
{
encoding: 'utf-16le',
input: [0x00, 0xdc, 0x00, 0x00],
expected: '\uFFFD\u0000',
name: 'unmatched surrogate trail'
},
{
encoding: 'utf-16le',
input: [0x00, 0xdc, 0x00, 0xd8],
expected: '\uFFFD\uFFFD',
name: 'swapped surrogate pair'
}
];
bad.forEach(function(t) {
test(function() {
assert_equals(new TextDecoder(t.encoding).decode(new Uint8Array(t.input)), t.expected);
}, t.encoding + " - " + t.name);
test(function() {
assert_throws({name: 'EncodingError'}, function() {
new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input))
});
}, t.encoding + " - " + t.name + ' (fatal flag set)');
});
</script>
This is a testharness.js-based test.
PASS Invalid surrogates encoded into UTF-8: Sanity check
PASS Invalid surrogates encoded into UTF-8: Surrogate half (low)
PASS Invalid surrogates encoded into UTF-8: Surrogate half (high)
PASS Invalid surrogates encoded into UTF-8: Surrogate half (low), in a string
PASS Invalid surrogates encoded into UTF-8: Surrogate half (high), in a string
PASS Invalid surrogates encoded into UTF-8: Wrong order
Harness: the test ran to completion.
<!DOCTYPE html>
<title>Encoding API: Invalid UTF-16 surrogates with UTF-8 encoding</title>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<script>
var badStrings = [
{
input: 'abc123',
expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
decoded: 'abc123',
name: 'Sanity check'
},
{
input: '\uD800',
expected: [0xef, 0xbf, 0xbd],
decoded: '\uFFFD',
name: 'Surrogate half (low)'
},
{
input: '\uDC00',
expected: [0xef, 0xbf, 0xbd],
decoded: '\uFFFD',
name: 'Surrogate half (high)'
},
{
input: 'abc\uD800123',
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
decoded: 'abc\uFFFD123',
name: 'Surrogate half (low), in a string'
},
{
input: 'abc\uDC00123',
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
decoded: 'abc\uFFFD123',
name: 'Surrogate half (high), in a string'
},
{
input: '\uDC00\uD800',
expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
decoded: '\uFFFD\uFFFD',
name: 'Wrong order'
}
];
badStrings.forEach(function(t) {
test(function() {
var encoded = new TextEncoder('utf-8').encode(t.input);
assert_array_equals([].slice.call(encoded), t.expected);
assert_equals(new TextDecoder('utf-8').decode(encoded), t.decoded);
}, 'Invalid surrogates encoded into UTF-8: ' + t.name);
});
</script>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment