Commit 83808a91 authored by jsbell@chromium.org's avatar jsbell@chromium.org

Encoding API: Update tests and simplify constructors

Add a snapshot of the WHATWG Encoding spec's encodings.js and document
the deltas from it in shared.js. Skip testing "replacement" encodings.

Also, Simplify the constructors by just passing through a TextEncoding
object rather than digging out a string then recreating it.

No functional changes.

BUG=277037
R=jshin@chromium.org

Review URL: https://codereview.chromium.org/269593009

git-svn-id: svn://svn.chromium.org/blink/trunk@173754 bbb929c8-8fbe-4397-9dbb-9b2b20218538
parent 77cff3ba
......@@ -28,13 +28,11 @@ PASS ASCII superset encoding: windows-1258
PASS ASCII superset encoding: x-mac-cyrillic
PASS ASCII superset encoding: gbk
PASS ASCII superset encoding: gb18030
PASS ASCII superset encoding: hz-gb-2312
PASS ASCII superset encoding: big5
PASS ASCII superset encoding: euc-jp
FAIL ASCII superset encoding: iso-2022-jp assert_equals: expected "\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" but got "\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r��\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
FAIL ASCII superset encoding: shift_jis assert_equals: expected "\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" but got "\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1c\x1b\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x1a"
PASS ASCII superset encoding: euc-kr
PASS ASCII superset encoding: iso-2022-kr
PASS ASCII superset encoding: x-user-defined
Harness: the test ran to completion.
......@@ -7,9 +7,7 @@
// Encodings that have escape codes in 0x00-0x7F
var escape_codes = {
'hz-gb-2312': [ 0x7E ],
'iso-2022-jp': [ 0x1B ],
'iso-2022-kr': [ 0x0E, 0x0F, 0x1B ]
'iso-2022-jp': [ 0x1B ]
};
encodings_table.forEach(function(section) {
......
......@@ -29,14 +29,11 @@ PASS Labels for: windows-1258
PASS Labels for: x-mac-cyrillic
PASS Labels for: gbk
PASS Labels for: gb18030
PASS Labels for: hz-gb-2312
FAIL Labels for: big5 assert_equals: expected "big5" but got "big5-hkscs"
PASS Labels for: big5
PASS Labels for: euc-jp
PASS Labels for: iso-2022-jp
PASS Labels for: shift_jis
PASS Labels for: euc-kr
PASS Labels for: iso-2022-kr
FAIL Labels for: replacement assert_equals: expected "replacement" but got "iso-2022-cn"
PASS Labels for: utf-16be
PASS Labels for: utf-16le
PASS Labels for: x-user-defined
......
......@@ -6,7 +6,9 @@
<script>
encodings_table.forEach(function(section) {
section.encodings.forEach(function(encoding) {
section.encodings.filter(function(encoding) {
return encoding.name !== 'replacement';
}).forEach(function(encoding) {
var name = encoding.name;
test(function(){
encoding.labels.forEach(function(label) {
......
......@@ -29,13 +29,11 @@ PASS Non-UTF encodings supported only for decode, not encode: windows-1258
PASS Non-UTF encodings supported only for decode, not encode: x-mac-cyrillic
PASS Non-UTF encodings supported only for decode, not encode: gbk
PASS Non-UTF encodings supported only for decode, not encode: gb18030
PASS Non-UTF encodings supported only for decode, not encode: hz-gb-2312
PASS Non-UTF encodings supported only for decode, not encode: big5
PASS Non-UTF encodings supported only for decode, not encode: euc-jp
PASS Non-UTF encodings supported only for decode, not encode: iso-2022-jp
PASS Non-UTF encodings supported only for decode, not encode: shift_jis
PASS Non-UTF encodings supported only for decode, not encode: euc-kr
PASS Non-UTF encodings supported only for decode, not encode: iso-2022-kr
PASS UTF encodings are supported for encode and decode: utf-16be
PASS UTF encodings are supported for encode and decode: utf-16le
PASS Non-UTF encodings supported only for decode, not encode: x-user-defined
......
[
{
"encodings": [
{
"labels": [
"unicode-1-1-utf-8",
"utf-8",
"utf8"
],
"name": "utf-8"
}
],
"heading": "The Encoding"
},
{
"encodings": [
{
"labels": [
"866",
"cp866",
"csibm866",
"ibm866"
],
"name": "ibm866"
},
{
"labels": [
"csisolatin2",
"iso-8859-2",
"iso-ir-101",
"iso8859-2",
"iso88592",
"iso_8859-2",
"iso_8859-2:1987",
"l2",
"latin2"
],
"name": "iso-8859-2"
},
{
"labels": [
"csisolatin3",
"iso-8859-3",
"iso-ir-109",
"iso8859-3",
"iso88593",
"iso_8859-3",
"iso_8859-3:1988",
"l3",
"latin3"
],
"name": "iso-8859-3"
},
{
"labels": [
"csisolatin4",
"iso-8859-4",
"iso-ir-110",
"iso8859-4",
"iso88594",
"iso_8859-4",
"iso_8859-4:1988",
"l4",
"latin4"
],
"name": "iso-8859-4"
},
{
"labels": [
"csisolatincyrillic",
"cyrillic",
"iso-8859-5",
"iso-ir-144",
"iso8859-5",
"iso88595",
"iso_8859-5",
"iso_8859-5:1988"
],
"name": "iso-8859-5"
},
{
"labels": [
"arabic",
"asmo-708",
"csiso88596e",
"csiso88596i",
"csisolatinarabic",
"ecma-114",
"iso-8859-6",
"iso-8859-6-e",
"iso-8859-6-i",
"iso-ir-127",
"iso8859-6",
"iso88596",
"iso_8859-6",
"iso_8859-6:1987"
],
"name": "iso-8859-6"
},
{
"labels": [
"csisolatingreek",
"ecma-118",
"elot_928",
"greek",
"greek8",
"iso-8859-7",
"iso-ir-126",
"iso8859-7",
"iso88597",
"iso_8859-7",
"iso_8859-7:1987",
"sun_eu_greek"
],
"name": "iso-8859-7"
},
{
"labels": [
"csiso88598e",
"csisolatinhebrew",
"hebrew",
"iso-8859-8",
"iso-8859-8-e",
"iso-ir-138",
"iso8859-8",
"iso88598",
"iso_8859-8",
"iso_8859-8:1988",
"visual"
],
"name": "iso-8859-8"
},
{
"labels": [
"csiso88598i",
"iso-8859-8-i",
"logical"
],
"name": "iso-8859-8-i"
},
{
"labels": [
"csisolatin6",
"iso-8859-10",
"iso-ir-157",
"iso8859-10",
"iso885910",
"l6",
"latin6"
],
"name": "iso-8859-10"
},
{
"labels": [
"iso-8859-13",
"iso8859-13",
"iso885913"
],
"name": "iso-8859-13"
},
{
"labels": [
"iso-8859-14",
"iso8859-14",
"iso885914"
],
"name": "iso-8859-14"
},
{
"labels": [
"csisolatin9",
"iso-8859-15",
"iso8859-15",
"iso885915",
"iso_8859-15",
"l9"
],
"name": "iso-8859-15"
},
{
"labels": [
"iso-8859-16"
],
"name": "iso-8859-16"
},
{
"labels": [
"cskoi8r",
"koi",
"koi8",
"koi8-r",
"koi8_r"
],
"name": "koi8-r"
},
{
"labels": [
"koi8-u"
],
"name": "koi8-u"
},
{
"labels": [
"csmacintosh",
"mac",
"macintosh",
"x-mac-roman"
],
"name": "macintosh"
},
{
"labels": [
"dos-874",
"iso-8859-11",
"iso8859-11",
"iso885911",
"tis-620",
"windows-874"
],
"name": "windows-874"
},
{
"labels": [
"cp1250",
"windows-1250",
"x-cp1250"
],
"name": "windows-1250"
},
{
"labels": [
"cp1251",
"windows-1251",
"x-cp1251"
],
"name": "windows-1251"
},
{
"labels": [
"ansi_x3.4-1968",
"ascii",
"cp1252",
"cp819",
"csisolatin1",
"ibm819",
"iso-8859-1",
"iso-ir-100",
"iso8859-1",
"iso88591",
"iso_8859-1",
"iso_8859-1:1987",
"l1",
"latin1",
"us-ascii",
"windows-1252",
"x-cp1252"
],
"name": "windows-1252"
},
{
"labels": [
"cp1253",
"windows-1253",
"x-cp1253"
],
"name": "windows-1253"
},
{
"labels": [
"cp1254",
"csisolatin5",
"iso-8859-9",
"iso-ir-148",
"iso8859-9",
"iso88599",
"iso_8859-9",
"iso_8859-9:1989",
"l5",
"latin5",
"windows-1254",
"x-cp1254"
],
"name": "windows-1254"
},
{
"labels": [
"cp1255",
"windows-1255",
"x-cp1255"
],
"name": "windows-1255"
},
{
"labels": [
"cp1256",
"windows-1256",
"x-cp1256"
],
"name": "windows-1256"
},
{
"labels": [
"cp1257",
"windows-1257",
"x-cp1257"
],
"name": "windows-1257"
},
{
"labels": [
"cp1258",
"windows-1258",
"x-cp1258"
],
"name": "windows-1258"
},
{
"labels": [
"x-mac-cyrillic",
"x-mac-ukrainian"
],
"name": "x-mac-cyrillic"
}
],
"heading": "Legacy single-byte encodings"
},
{
"encodings": [
{
"labels": [
"chinese",
"csgb2312",
"csiso58gb231280",
"gb18030",
"gb2312",
"gb_2312",
"gb_2312-80",
"gbk",
"iso-ir-58",
"x-gbk"
],
"name": "gb18030"
},
{
"labels": [
"hz-gb-2312"
],
"name": "hz-gb-2312"
}
],
"heading": "Legacy multi-byte Chinese (simplified) encodings"
},
{
"encodings": [
{
"labels": [
"big5",
"big5-hkscs",
"cn-big5",
"csbig5",
"x-x-big5"
],
"name": "big5"
}
],
"heading": "Legacy multi-byte Chinese (traditional) encodings"
},
{
"encodings": [
{
"labels": [
"cseucpkdfmtjapanese",
"euc-jp",
"x-euc-jp"
],
"name": "euc-jp"
},
{
"labels": [
"csiso2022jp",
"iso-2022-jp"
],
"name": "iso-2022-jp"
},
{
"labels": [
"csshiftjis",
"ms_kanji",
"shift-jis",
"shift_jis",
"sjis",
"windows-31j",
"x-sjis"
],
"name": "shift_jis"
}
],
"heading": "Legacy multi-byte Japanese encodings"
},
{
"encodings": [
{
"labels": [
"cseuckr",
"csksc56011987",
"euc-kr",
"iso-ir-149",
"korean",
"ks_c_5601-1987",
"ks_c_5601-1989",
"ksc5601",
"ksc_5601",
"windows-949"
],
"name": "euc-kr"
}
],
"heading": "Legacy multi-byte Korean encodings"
},
{
"encodings": [
{
"labels": [
"csiso2022kr",
"iso-2022-cn",
"iso-2022-cn-ext",
"iso-2022-kr"
],
"name": "replacement"
},
{
"labels": [
"utf-16be"
],
"name": "utf-16be"
},
{
"labels": [
"utf-16",
"utf-16le"
],
"name": "utf-16le"
},
{
"labels": [
"x-user-defined"
],
"name": "x-user-defined"
}
],
"heading": "Legacy miscellaneous encodings"
}
]
// This file is based on non-normative encodings.json resource referenced by
// http://encoding.spec.whatwg.org/ - a reference copy is saved in this
// directory and can be updated via:
// curl -O http://encoding.spec.whatwg.org/encodings.json
//
// Changes made to this file are:
// * whitespace
// * encodings.json data assigned to `encodings_table` (for tests)
// * UTF encodings listed in `utf_encodings` (for tests)
// * 'ibm866' not yet supported - crbug.com/277023
// * 'gb18030' distinct from 'gbk' - crbug.com/339862
// * 'big5-hkscs' distinct from 'big5' - crbug.com/277040
// * 'hz-gb-2312' is replacement label - w3.org/Bugs/Public/show_bug.cgi?id=25339
// Only these encodings are supported for encoding (vs. decoding)
utf_encodings = ["utf-8", "utf-16le", "utf-16be"];
var utf_encodings = ["utf-8", "utf-16le", "utf-16be"];
// From non-normative encodings.json resource referenced by http://encoding.spec.whatwg.org/
encodings_table = [
var encodings_table = [
{
"encodings": [
{
......@@ -346,17 +359,12 @@ encodings_table = [
],
"name": "gbk"
},
// 'gb18030' separate from 'gbk': crbug.com/339862
{
"labels": [
"gb18030"
],
"name": "gb18030"
},
{
"labels": [
"hz-gb-2312"
],
"name": "hz-gb-2312"
}
],
"heading": "Legacy multi-byte Chinese (simplified) encodings"
......@@ -366,7 +374,7 @@ encodings_table = [
{
"labels": [
"big5",
"big5-hkscs",
// "big5-hkscs", see crbug.com/277040
"cn-big5",
"csbig5",
"x-x-big5"
......@@ -424,13 +432,6 @@ encodings_table = [
"windows-949"
],
"name": "euc-kr"
},
{
"labels": [
"csiso2022kr",
"iso-2022-kr"
],
"name": "iso-2022-kr"
}
],
"heading": "Legacy multi-byte Korean encodings"
......@@ -439,8 +440,12 @@ encodings_table = [
"encodings": [
{
"labels": [
"csiso2022kr",
// 'hz-gb-2312' added: w3.org/Bugs/Public/show_bug.cgi?id=25339
"hz-gb-2312",
"iso-2022-cn",
"iso-2022-cn-ext"
"iso-2022-cn-ext",
"iso-2022-kr"
],
"name": "replacement"
},
......
......@@ -51,13 +51,13 @@ TextDecoder* TextDecoder::create(const String& label, const Dictionary& options,
bool fatal = false;
options.get("fatal", fatal);
return new TextDecoder(encoding.name(), fatal);
return new TextDecoder(encoding, fatal);
}
TextDecoder::TextDecoder(const String& encoding, bool fatal)
TextDecoder::TextDecoder(const WTF::TextEncoding& encoding, bool fatal)
: m_encoding(encoding)
, m_codec(newTextCodec(m_encoding))
, m_codec(newTextCodec(encoding))
, m_fatal(fatal)
, m_bomSeen(false)
{
......
......@@ -56,7 +56,7 @@ public:
void trace(Visitor*) { }
private:
TextDecoder(const String& encoding, bool fatal);
TextDecoder(const WTF::TextEncoding&, bool fatal);
WTF::TextEncoding m_encoding;
OwnPtr<WTF::TextCodec> m_codec;
......
......@@ -55,12 +55,12 @@ TextEncoder* TextEncoder::create(const String& utfLabel, ExceptionState& excepti
return 0;
}
return new TextEncoder(encoding.name());
return new TextEncoder(encoding);
}
TextEncoder::TextEncoder(const String& encoding)
TextEncoder::TextEncoder(const WTF::TextEncoding& encoding)
: m_encoding(encoding)
, m_codec(newTextCodec(m_encoding))
, m_codec(newTextCodec(encoding))
{
}
......
......@@ -56,7 +56,7 @@ public:
void trace(Visitor*) { }
private:
TextEncoder(const String& encoding);
TextEncoder(const WTF::TextEncoding&);
WTF::TextEncoding m_encoding;
OwnPtr<WTF::TextCodec> m_codec;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment