Commit d7f04496 authored by rsleevi@chromium.org's avatar rsleevi@chromium.org

Reduce footprint of registry controlled domain table

The perfect hash table containing all registry controlled domains is
replaced by a compact graph (a dafsa) to reduce binary size and PSS
of the running process. Size of the new structure is about 33kB
compared to 380kB for the perfect hash table.

Patch by Olle Liljenzin <ollel@opera.com>, originally at
https://codereview.chromium.org/197183002/

BUG=370672
R=brettw@chromium.org

Review URL: https://codereview.chromium.org/270363003

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@270039 0039d316-1c4b-4281-b951-d872f2087c98
parent 28e06318
...@@ -82,12 +82,13 @@ component("net") { ...@@ -82,12 +82,13 @@ component("net") {
include_dirs = [] include_dirs = []
deps = [ deps = [
":net_resources", ":net_resources",
"//base", "//base",
"//base:i18n", "//base:i18n",
"//base/third_party/dynamic_annotations", "//base/third_party/dynamic_annotations",
"//crypto", "//crypto",
"//crypto:platform", "//crypto:platform",
"//net/base/registry_controlled_domains:registry_controlled_domains",
"//sdch", "//sdch",
"//third_party/icu", "//third_party/icu",
"//third_party/zlib", "//third_party/zlib",
......
# Copyright (c) 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
action_foreach("registry_controlled_domains") {
script = "//net/tools/tld_cleanup/make_dafsa.py"
sources = [
"effective_tld_names.gperf",
"effective_tld_names_unittest1.gperf",
"effective_tld_names_unittest2.gperf",
"effective_tld_names_unittest3.gperf",
"effective_tld_names_unittest4.gperf",
"effective_tld_names_unittest5.gperf",
"effective_tld_names_unittest6.gperf",
]
outputs = [
"${target_gen_dir}/{{source_name_part}}-inc.cc"
]
args = [
"{{source}}",
rebase_path("${target_gen_dir}/{{source_name_part}}-inc.cc", root_build_dir)
]
}
This source diff could not be displayed because it is too large. You can view the blob instead.
/* C++ code produced by gperf version 3.0.3 */
/* Command-line: gperf -a -L C++ -C -c -o -t -k '*' -NFindDomain -ZPerfect_Hash_Test1 -P -K name_offset -Q stringpool1 -D effective_tld_names_unittest1.gperf */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
/* The character set is not based on ISO-646. */
#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
#endif
#line 1 "effective_tld_names_unittest1.gperf"
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Test file used by registry_controlled_domain_unittest.
// We edit this file manually, then run
// gperf -a -L "C++" -C -c -o -t -k '*' -NFindDomain -ZPerfect_Hash_Test1 -P -K name_offset -Q stringpool1 -D effective_tld_names_unittest1.gperf > effective_tld_names_unittest1.cc
// to generate the perfect hashmap.
#line 10 "effective_tld_names_unittest1.gperf"
struct DomainRule {
int name_offset;
int type; // 1: exception, 2: wildcard, 4: private
};
#define TOTAL_KEYWORDS 11
#define MIN_WORD_LENGTH 1
#define MAX_WORD_LENGTH 11
#define MIN_HASH_VALUE 1
#define MAX_HASH_VALUE 17
/* maximum key range = 17, duplicates = 0 */
class Perfect_Hash_Test1
{
private:
static inline unsigned int hash (const char *str, unsigned int len);
public:
static const struct DomainRule *FindDomain (const char *str, unsigned int len);
};
inline unsigned int
Perfect_Hash_Test1::hash (register const char *str, register unsigned int len)
{
static const unsigned char asso_values[] =
{
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 0, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 0, 0, 0,
18, 5, 0, 18, 18, 0, 0, 18, 18, 0,
5, 0, 0, 18, 0, 18, 5, 18, 0, 18,
18, 18, 0, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18
};
register int hval = len;
switch (hval)
{
default:
hval += asso_values[(unsigned char)str[10]];
/*FALLTHROUGH*/
case 10:
hval += asso_values[(unsigned char)str[9]];
/*FALLTHROUGH*/
case 9:
hval += asso_values[(unsigned char)str[8]];
/*FALLTHROUGH*/
case 8:
hval += asso_values[(unsigned char)str[7]];
/*FALLTHROUGH*/
case 7:
hval += asso_values[(unsigned char)str[6]];
/*FALLTHROUGH*/
case 6:
hval += asso_values[(unsigned char)str[5]];
/*FALLTHROUGH*/
case 5:
hval += asso_values[(unsigned char)str[4]];
/*FALLTHROUGH*/
case 4:
hval += asso_values[(unsigned char)str[3]];
/*FALLTHROUGH*/
case 3:
hval += asso_values[(unsigned char)str[2]];
/*FALLTHROUGH*/
case 2:
hval += asso_values[(unsigned char)str[1]];
/*FALLTHROUGH*/
case 1:
hval += asso_values[(unsigned char)str[0]];
break;
}
return hval;
}
struct stringpool1_t
{
char stringpool1_str0[sizeof("c")];
char stringpool1_str1[sizeof("jp")];
char stringpool1_str2[sizeof("b.c")];
char stringpool1_str3[sizeof("ac.jp")];
char stringpool1_str4[sizeof("bar.jp")];
char stringpool1_str5[sizeof("no")];
char stringpool1_str6[sizeof("baz.bar.jp")];
char stringpool1_str7[sizeof("bar.baz.com")];
char stringpool1_str8[sizeof("priv.no")];
char stringpool1_str9[sizeof("pref.bar.jp")];
char stringpool1_str10[sizeof("private")];
};
static const struct stringpool1_t stringpool1_contents =
{
"c",
"jp",
"b.c",
"ac.jp",
"bar.jp",
"no",
"baz.bar.jp",
"bar.baz.com",
"priv.no",
"pref.bar.jp",
"private"
};
#define stringpool1 ((const char *) &stringpool1_contents)
const struct DomainRule *
Perfect_Hash_Test1::FindDomain (register const char *str, register unsigned int len)
{
static const struct DomainRule wordlist[] =
{
#line 21 "effective_tld_names_unittest1.gperf"
{(int)(long)&((struct stringpool1_t *)0)->stringpool1_str0, 2},
#line 15 "effective_tld_names_unittest1.gperf"
{(int)(long)&((struct stringpool1_t *)0)->stringpool1_str1, 0},
#line 22 "effective_tld_names_unittest1.gperf"
{(int)(long)&((struct stringpool1_t *)0)->stringpool1_str2, 1},
#line 16 "effective_tld_names_unittest1.gperf"
{(int)(long)&((struct stringpool1_t *)0)->stringpool1_str3, 0},
#line 17 "effective_tld_names_unittest1.gperf"
{(int)(long)&((struct stringpool1_t *)0)->stringpool1_str4, 2},
#line 23 "effective_tld_names_unittest1.gperf"
{(int)(long)&((struct stringpool1_t *)0)->stringpool1_str5, 0},
#line 18 "effective_tld_names_unittest1.gperf"
{(int)(long)&((struct stringpool1_t *)0)->stringpool1_str6, 2},
#line 20 "effective_tld_names_unittest1.gperf"
{(int)(long)&((struct stringpool1_t *)0)->stringpool1_str7, 0},
#line 24 "effective_tld_names_unittest1.gperf"
{(int)(long)&((struct stringpool1_t *)0)->stringpool1_str8, 4},
#line 19 "effective_tld_names_unittest1.gperf"
{(int)(long)&((struct stringpool1_t *)0)->stringpool1_str9, 1},
#line 25 "effective_tld_names_unittest1.gperf"
{(int)(long)&((struct stringpool1_t *)0)->stringpool1_str10, 4}
};
static const signed char lookup[] =
{
-1, 0, 1, 2, -1, 3, 4, 5, -1, -1, 6, 7, 8, -1,
-1, -1, 9, 10
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
{
register int key = hash (str, len);
if (key <= MAX_HASH_VALUE && key >= 0)
{
register int index = lookup[key];
if (index >= 0)
{
register const char *s = wordlist[index].name_offset + stringpool1;
if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
return &wordlist[index];
}
}
}
return 0;
}
#line 26 "effective_tld_names_unittest1.gperf"
/* C++ code produced by gperf version 3.0.3 */
/* Command-line: gperf -a -L C++ -C -c -o -t -k '*' -NFindDomain -ZPerfect_Hash_Test2 -P -K name_offset -Q stringpool2 -D -T effective_tld_names_unittest2.gperf */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
/* The character set is not based on ISO-646. */
#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
#endif
#line 1 "effective_tld_names_unittest2.gperf"
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Test file used by registry_controlled_domain_unittest.
// We edit this file manually, then run
// gperf -a -L "C++" -C -c -o -t -k '*' -NFindDomain -ZPerfect_Hash_Test2 -P -K name_offset -Q stringpool2 -D -T effective_tld_names_unittest2.gperf > effective_tld_names_unittest2.cc
// to generate the perfect hashmap.
#define TOTAL_KEYWORDS 2
#define MIN_WORD_LENGTH 2
#define MAX_WORD_LENGTH 6
#define MIN_HASH_VALUE 2
#define MAX_HASH_VALUE 6
/* maximum key range = 5, duplicates = 0 */
class Perfect_Hash_Test2
{
private:
static inline unsigned int hash (const char *str, unsigned int len);
public:
static const struct DomainRule *FindDomain (const char *str, unsigned int len);
};
inline unsigned int
Perfect_Hash_Test2::hash (register const char *str, register unsigned int len)
{
static const unsigned char asso_values[] =
{
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 0, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 0, 0, 7,
7, 7, 7, 7, 7, 7, 0, 7, 7, 7,
7, 7, 0, 7, 0, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7
};
register int hval = len;
switch (hval)
{
default:
hval += asso_values[(unsigned char)str[5]];
/*FALLTHROUGH*/
case 5:
hval += asso_values[(unsigned char)str[4]];
/*FALLTHROUGH*/
case 4:
hval += asso_values[(unsigned char)str[3]];
/*FALLTHROUGH*/
case 3:
hval += asso_values[(unsigned char)str[2]];
/*FALLTHROUGH*/
case 2:
hval += asso_values[(unsigned char)str[1]];
/*FALLTHROUGH*/
case 1:
hval += asso_values[(unsigned char)str[0]];
break;
}
return hval;
}
struct stringpool2_t
{
char stringpool2_str0[sizeof("jp")];
char stringpool2_str1[sizeof("bar.jp")];
};
static const struct stringpool2_t stringpool2_contents =
{
"jp",
"bar.jp"
};
#define stringpool2 ((const char *) &stringpool2_contents)
const struct DomainRule *
Perfect_Hash_Test2::FindDomain (register const char *str, register unsigned int len)
{
static const struct DomainRule wordlist[] =
{
#line 15 "effective_tld_names_unittest2.gperf"
{(int)(long)&((struct stringpool2_t *)0)->stringpool2_str0, 0},
#line 16 "effective_tld_names_unittest2.gperf"
{(int)(long)&((struct stringpool2_t *)0)->stringpool2_str1, 0}
};
static const signed char lookup[] =
{
-1, -1, 0, -1, -1, -1, 1
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
{
register int key = hash (str, len);
if (key <= MAX_HASH_VALUE && key >= 0)
{
register int index = lookup[key];
if (index >= 0)
{
register const char *s = wordlist[index].name_offset + stringpool2;
if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
return &wordlist[index];
}
}
}
return 0;
}
#line 17 "effective_tld_names_unittest2.gperf"
This file is for testing 2 byte offsets
%%
0____________________________________________________________________________________________________0, 0
1____________________________________________________________________________________________________1, 4
2____________________________________________________________________________________________________2, 0
3____________________________________________________________________________________________________3, 4
4____________________________________________________________________________________________________4, 0
5____________________________________________________________________________________________________5, 4
6____________________________________________________________________________________________________6, 0
7____________________________________________________________________________________________________7, 4
8____________________________________________________________________________________________________8, 0
9____________________________________________________________________________________________________9, 4
%%
This file is for testing 3 byte offsets
%%
a0____________________________________________________________________________________________________a0, 0
a1____________________________________________________________________________________________________a1, 4
a2____________________________________________________________________________________________________a2, 0
a3____________________________________________________________________________________________________a3, 4
a4____________________________________________________________________________________________________a4, 0
a5____________________________________________________________________________________________________a5, 4
a6____________________________________________________________________________________________________a6, 0
a7____________________________________________________________________________________________________a7, 4
a8____________________________________________________________________________________________________a8, 0
a9____________________________________________________________________________________________________a9, 4
b0____________________________________________________________________________________________________b0, 0
b1____________________________________________________________________________________________________b1, 4
b2____________________________________________________________________________________________________b2, 0
b3____________________________________________________________________________________________________b3, 4
b4____________________________________________________________________________________________________b4, 0
b5____________________________________________________________________________________________________b5, 4
b6____________________________________________________________________________________________________b6, 0
b7____________________________________________________________________________________________________b7, 4
b8____________________________________________________________________________________________________b8, 0
b9____________________________________________________________________________________________________b9, 4
c0____________________________________________________________________________________________________c0, 0
c1____________________________________________________________________________________________________c1, 4
c2____________________________________________________________________________________________________c2, 0
c3____________________________________________________________________________________________________c3, 4
c4____________________________________________________________________________________________________c4, 0
c5____________________________________________________________________________________________________c5, 4
c6____________________________________________________________________________________________________c6, 0
c7____________________________________________________________________________________________________c7, 4
c8____________________________________________________________________________________________________c8, 0
c9____________________________________________________________________________________________________c9, 4
d0____________________________________________________________________________________________________d0, 0
d1____________________________________________________________________________________________________d1, 4
d2____________________________________________________________________________________________________d2, 0
d3____________________________________________________________________________________________________d3, 4
d4____________________________________________________________________________________________________d4, 0
d5____________________________________________________________________________________________________d5, 4
d6____________________________________________________________________________________________________d6, 0
d7____________________________________________________________________________________________________d7, 4
d8____________________________________________________________________________________________________d8, 0
d9____________________________________________________________________________________________________d9, 4
e0____________________________________________________________________________________________________e0, 0
e1____________________________________________________________________________________________________e1, 4
e2____________________________________________________________________________________________________e2, 0
e3____________________________________________________________________________________________________e3, 4
e4____________________________________________________________________________________________________e4, 0
e5____________________________________________________________________________________________________e5, 4
e6____________________________________________________________________________________________________e6, 0
e7____________________________________________________________________________________________________e7, 4
e8____________________________________________________________________________________________________e8, 0
e9____________________________________________________________________________________________________e9, 4
f0____________________________________________________________________________________________________f0, 0
f1____________________________________________________________________________________________________f1, 4
f2____________________________________________________________________________________________________f2, 0
f3____________________________________________________________________________________________________f3, 4
f4____________________________________________________________________________________________________f4, 0
f5____________________________________________________________________________________________________f5, 4
f6____________________________________________________________________________________________________f6, 0
f7____________________________________________________________________________________________________f7, 4
f8____________________________________________________________________________________________________f8, 0
f9____________________________________________________________________________________________________f9, 4
g0____________________________________________________________________________________________________g0, 0
g1____________________________________________________________________________________________________g1, 4
g2____________________________________________________________________________________________________g2, 0
g3____________________________________________________________________________________________________g3, 4
g4____________________________________________________________________________________________________g4, 0
g5____________________________________________________________________________________________________g5, 4
g6____________________________________________________________________________________________________g6, 0
g7____________________________________________________________________________________________________g7, 4
g8____________________________________________________________________________________________________g8, 0
g9____________________________________________________________________________________________________g9, 4
h0____________________________________________________________________________________________________h0, 0
h1____________________________________________________________________________________________________h1, 4
h2____________________________________________________________________________________________________h2, 0
h3____________________________________________________________________________________________________h3, 4
h4____________________________________________________________________________________________________h4, 0
h5____________________________________________________________________________________________________h5, 4
h6____________________________________________________________________________________________________h6, 0
h7____________________________________________________________________________________________________h7, 4
h8____________________________________________________________________________________________________h8, 0
h9____________________________________________________________________________________________________h9, 4
i0____________________________________________________________________________________________________i0, 0
i1____________________________________________________________________________________________________i1, 4
i2____________________________________________________________________________________________________i2, 0
i3____________________________________________________________________________________________________i3, 4
i4____________________________________________________________________________________________________i4, 0
i5____________________________________________________________________________________________________i5, 4
i6____________________________________________________________________________________________________i6, 0
i7____________________________________________________________________________________________________i7, 4
i8____________________________________________________________________________________________________i8, 0
i9____________________________________________________________________________________________________i9, 4
j0____________________________________________________________________________________________________j0, 0
j1____________________________________________________________________________________________________j1, 4
j2____________________________________________________________________________________________________j2, 0
j3____________________________________________________________________________________________________j3, 4
j4____________________________________________________________________________________________________j4, 0
j5____________________________________________________________________________________________________j5, 4
j6____________________________________________________________________________________________________j6, 0
j7____________________________________________________________________________________________________j7, 4
j8____________________________________________________________________________________________________j8, 0
j9____________________________________________________________________________________________________j9, 4
k0____________________________________________________________________________________________________k0, 0
k1____________________________________________________________________________________________________k1, 4
k2____________________________________________________________________________________________________k2, 0
k3____________________________________________________________________________________________________k3, 4
k4____________________________________________________________________________________________________k4, 0
k5____________________________________________________________________________________________________k5, 4
k6____________________________________________________________________________________________________k6, 0
k7____________________________________________________________________________________________________k7, 4
k8____________________________________________________________________________________________________k8, 0
k9____________________________________________________________________________________________________k9, 4
l0____________________________________________________________________________________________________l0, 0
l1____________________________________________________________________________________________________l1, 4
l2____________________________________________________________________________________________________l2, 0
l3____________________________________________________________________________________________________l3, 4
l4____________________________________________________________________________________________________l4, 0
l5____________________________________________________________________________________________________l5, 4
l6____________________________________________________________________________________________________l6, 0
l7____________________________________________________________________________________________________l7, 4
l8____________________________________________________________________________________________________l8, 0
l9____________________________________________________________________________________________________l9, 4
m0____________________________________________________________________________________________________m0, 0
m1____________________________________________________________________________________________________m1, 4
m2____________________________________________________________________________________________________m2, 0
m3____________________________________________________________________________________________________m3, 4
m4____________________________________________________________________________________________________m4, 0
m5____________________________________________________________________________________________________m5, 4
m6____________________________________________________________________________________________________m6, 0
m7____________________________________________________________________________________________________m7, 4
m8____________________________________________________________________________________________________m8, 0
m9____________________________________________________________________________________________________m9, 4
n0____________________________________________________________________________________________________n0, 0
n1____________________________________________________________________________________________________n1, 4
n2____________________________________________________________________________________________________n2, 0
n3____________________________________________________________________________________________________n3, 4
n4____________________________________________________________________________________________________n4, 0
n5____________________________________________________________________________________________________n5, 4
n6____________________________________________________________________________________________________n6, 0
n7____________________________________________________________________________________________________n7, 4
n8____________________________________________________________________________________________________n8, 0
n9____________________________________________________________________________________________________n9, 4
o0____________________________________________________________________________________________________o0, 0
o1____________________________________________________________________________________________________o1, 4
o2____________________________________________________________________________________________________o2, 0
o3____________________________________________________________________________________________________o3, 4
o4____________________________________________________________________________________________________o4, 0
o5____________________________________________________________________________________________________o5, 4
o6____________________________________________________________________________________________________o6, 0
o7____________________________________________________________________________________________________o7, 4
o8____________________________________________________________________________________________________o8, 0
o9____________________________________________________________________________________________________o9, 4
p0____________________________________________________________________________________________________p0, 0
p1____________________________________________________________________________________________________p1, 4
p2____________________________________________________________________________________________________p2, 0
p3____________________________________________________________________________________________________p3, 4
p4____________________________________________________________________________________________________p4, 0
p5____________________________________________________________________________________________________p5, 4
p6____________________________________________________________________________________________________p6, 0
p7____________________________________________________________________________________________________p7, 4
p8____________________________________________________________________________________________________p8, 0
p9____________________________________________________________________________________________________p9, 4
q0____________________________________________________________________________________________________q0, 0
q1____________________________________________________________________________________________________q1, 4
q2____________________________________________________________________________________________________q2, 0
q3____________________________________________________________________________________________________q3, 4
q4____________________________________________________________________________________________________q4, 0
q5____________________________________________________________________________________________________q5, 4
q6____________________________________________________________________________________________________q6, 0
q7____________________________________________________________________________________________________q7, 4
q8____________________________________________________________________________________________________q8, 0
q9____________________________________________________________________________________________________q9, 4
r0____________________________________________________________________________________________________r0, 0
r1____________________________________________________________________________________________________r1, 4
r2____________________________________________________________________________________________________r2, 0
r3____________________________________________________________________________________________________r3, 4
r4____________________________________________________________________________________________________r4, 0
r5____________________________________________________________________________________________________r5, 4
r6____________________________________________________________________________________________________r6, 0
r7____________________________________________________________________________________________________r7, 4
r8____________________________________________________________________________________________________r8, 0
r9____________________________________________________________________________________________________r9, 4
s0____________________________________________________________________________________________________s0, 0
s1____________________________________________________________________________________________________s1, 4
s2____________________________________________________________________________________________________s2, 0
s3____________________________________________________________________________________________________s3, 4
s4____________________________________________________________________________________________________s4, 0
s5____________________________________________________________________________________________________s5, 4
s6____________________________________________________________________________________________________s6, 0
s7____________________________________________________________________________________________________s7, 4
s8____________________________________________________________________________________________________s8, 0
s9____________________________________________________________________________________________________s9, 4
t0____________________________________________________________________________________________________t0, 0
t1____________________________________________________________________________________________________t1, 4
t2____________________________________________________________________________________________________t2, 0
t3____________________________________________________________________________________________________t3, 4
t4____________________________________________________________________________________________________t4, 0
t5____________________________________________________________________________________________________t5, 4
t6____________________________________________________________________________________________________t6, 0
t7____________________________________________________________________________________________________t7, 4
t8____________________________________________________________________________________________________t8, 0
t9____________________________________________________________________________________________________t9, 4
u0____________________________________________________________________________________________________u0, 0
u1____________________________________________________________________________________________________u1, 4
u2____________________________________________________________________________________________________u2, 0
u3____________________________________________________________________________________________________u3, 4
u4____________________________________________________________________________________________________u4, 0
u5____________________________________________________________________________________________________u5, 4
u6____________________________________________________________________________________________________u6, 0
u7____________________________________________________________________________________________________u7, 4
u8____________________________________________________________________________________________________u8, 0
u9____________________________________________________________________________________________________u9, 4
v0____________________________________________________________________________________________________v0, 0
v1____________________________________________________________________________________________________v1, 4
v2____________________________________________________________________________________________________v2, 0
v3____________________________________________________________________________________________________v3, 4
v4____________________________________________________________________________________________________v4, 0
v5____________________________________________________________________________________________________v5, 4
v6____________________________________________________________________________________________________v6, 0
v7____________________________________________________________________________________________________v7, 4
v8____________________________________________________________________________________________________v8, 0
v9____________________________________________________________________________________________________v9, 4
w0____________________________________________________________________________________________________w0, 0
w1____________________________________________________________________________________________________w1, 4
w2____________________________________________________________________________________________________w2, 0
w3____________________________________________________________________________________________________w3, 4
w4____________________________________________________________________________________________________w4, 0
w5____________________________________________________________________________________________________w5, 4
w6____________________________________________________________________________________________________w6, 0
w7____________________________________________________________________________________________________w7, 4
w8____________________________________________________________________________________________________w8, 0
w9____________________________________________________________________________________________________w9, 4
x0____________________________________________________________________________________________________x0, 0
x1____________________________________________________________________________________________________x1, 4
x2____________________________________________________________________________________________________x2, 0
x3____________________________________________________________________________________________________x3, 4
x4____________________________________________________________________________________________________x4, 0
x5____________________________________________________________________________________________________x5, 4
x6____________________________________________________________________________________________________x6, 0
x7____________________________________________________________________________________________________x7, 4
x8____________________________________________________________________________________________________x8, 0
x9____________________________________________________________________________________________________x9, 4
y0____________________________________________________________________________________________________y0, 0
y1____________________________________________________________________________________________________y1, 4
y2____________________________________________________________________________________________________y2, 0
y3____________________________________________________________________________________________________y3, 4
y4____________________________________________________________________________________________________y4, 0
y5____________________________________________________________________________________________________y5, 4
y6____________________________________________________________________________________________________y6, 0
y7____________________________________________________________________________________________________y7, 4
y8____________________________________________________________________________________________________y8, 0
y9____________________________________________________________________________________________________y9, 4
z0____________________________________________________________________________________________________z0, 0
z1____________________________________________________________________________________________________z1, 4
z2____________________________________________________________________________________________________z2, 0
z3____________________________________________________________________________________________________z3, 4
z4____________________________________________________________________________________________________z4, 0
z5____________________________________________________________________________________________________z5, 4
z6____________________________________________________________________________________________________z6, 0
z7____________________________________________________________________________________________________z7, 4
z8____________________________________________________________________________________________________z8, 0
z9____________________________________________________________________________________________________z9, 4
A0____________________________________________________________________________________________________A0, 0
A1____________________________________________________________________________________________________A1, 4
A2____________________________________________________________________________________________________A2, 0
A3____________________________________________________________________________________________________A3, 4
A4____________________________________________________________________________________________________A4, 0
A5____________________________________________________________________________________________________A5, 4
A6____________________________________________________________________________________________________A6, 0
A7____________________________________________________________________________________________________A7, 4
A8____________________________________________________________________________________________________A8, 0
A9____________________________________________________________________________________________________A9, 4
B0____________________________________________________________________________________________________B0, 0
B1____________________________________________________________________________________________________B1, 4
B2____________________________________________________________________________________________________B2, 0
B3____________________________________________________________________________________________________B3, 4
B4____________________________________________________________________________________________________B4, 0
B5____________________________________________________________________________________________________B5, 4
B6____________________________________________________________________________________________________B6, 0
B7____________________________________________________________________________________________________B7, 4
B8____________________________________________________________________________________________________B8, 0
B9____________________________________________________________________________________________________B9, 4
C0____________________________________________________________________________________________________C0, 0
C1____________________________________________________________________________________________________C1, 4
C2____________________________________________________________________________________________________C2, 0
C3____________________________________________________________________________________________________C3, 4
C4____________________________________________________________________________________________________C4, 0
C5____________________________________________________________________________________________________C5, 4
C6____________________________________________________________________________________________________C6, 0
C7____________________________________________________________________________________________________C7, 4
C8____________________________________________________________________________________________________C8, 0
C9____________________________________________________________________________________________________C9, 4
D0____________________________________________________________________________________________________D0, 0
D1____________________________________________________________________________________________________D1, 4
D2____________________________________________________________________________________________________D2, 0
D3____________________________________________________________________________________________________D3, 4
D4____________________________________________________________________________________________________D4, 0
D5____________________________________________________________________________________________________D5, 4
D6____________________________________________________________________________________________________D6, 0
D7____________________________________________________________________________________________________D7, 4
D8____________________________________________________________________________________________________D8, 0
D9____________________________________________________________________________________________________D9, 4
E0____________________________________________________________________________________________________E0, 0
E1____________________________________________________________________________________________________E1, 4
E2____________________________________________________________________________________________________E2, 0
E3____________________________________________________________________________________________________E3, 4
E4____________________________________________________________________________________________________E4, 0
E5____________________________________________________________________________________________________E5, 4
E6____________________________________________________________________________________________________E6, 0
E7____________________________________________________________________________________________________E7, 4
E8____________________________________________________________________________________________________E8, 0
E9____________________________________________________________________________________________________E9, 4
F0____________________________________________________________________________________________________F0, 0
F1____________________________________________________________________________________________________F1, 4
F2____________________________________________________________________________________________________F2, 0
F3____________________________________________________________________________________________________F3, 4
F4____________________________________________________________________________________________________F4, 0
F5____________________________________________________________________________________________________F5, 4
F6____________________________________________________________________________________________________F6, 0
F7____________________________________________________________________________________________________F7, 4
F8____________________________________________________________________________________________________F8, 0
F9____________________________________________________________________________________________________F9, 4
G0____________________________________________________________________________________________________G0, 0
G1____________________________________________________________________________________________________G1, 4
G2____________________________________________________________________________________________________G2, 0
G3____________________________________________________________________________________________________G3, 4
G4____________________________________________________________________________________________________G4, 0
G5____________________________________________________________________________________________________G5, 4
G6____________________________________________________________________________________________________G6, 0
G7____________________________________________________________________________________________________G7, 4
G8____________________________________________________________________________________________________G8, 0
G9____________________________________________________________________________________________________G9, 4
H0____________________________________________________________________________________________________H0, 0
H1____________________________________________________________________________________________________H1, 4
H2____________________________________________________________________________________________________H2, 0
H3____________________________________________________________________________________________________H3, 4
H4____________________________________________________________________________________________________H4, 0
H5____________________________________________________________________________________________________H5, 4
H6____________________________________________________________________________________________________H6, 0
H7____________________________________________________________________________________________________H7, 4
H8____________________________________________________________________________________________________H8, 0
H9____________________________________________________________________________________________________H9, 4
I0____________________________________________________________________________________________________I0, 0
I1____________________________________________________________________________________________________I1, 4
I2____________________________________________________________________________________________________I2, 0
I3____________________________________________________________________________________________________I3, 4
I4____________________________________________________________________________________________________I4, 0
I5____________________________________________________________________________________________________I5, 4
I6____________________________________________________________________________________________________I6, 0
I7____________________________________________________________________________________________________I7, 4
I8____________________________________________________________________________________________________I8, 0
I9____________________________________________________________________________________________________I9, 4
J0____________________________________________________________________________________________________J0, 0
J1____________________________________________________________________________________________________J1, 4
J2____________________________________________________________________________________________________J2, 0
J3____________________________________________________________________________________________________J3, 4
J4____________________________________________________________________________________________________J4, 0
J5____________________________________________________________________________________________________J5, 4
J6____________________________________________________________________________________________________J6, 0
J7____________________________________________________________________________________________________J7, 4
J8____________________________________________________________________________________________________J8, 0
J9____________________________________________________________________________________________________J9, 4
K0____________________________________________________________________________________________________K0, 0
K1____________________________________________________________________________________________________K1, 4
K2____________________________________________________________________________________________________K2, 0
K3____________________________________________________________________________________________________K3, 4
K4____________________________________________________________________________________________________K4, 0
K5____________________________________________________________________________________________________K5, 4
K6____________________________________________________________________________________________________K6, 0
K7____________________________________________________________________________________________________K7, 4
K8____________________________________________________________________________________________________K8, 0
K9____________________________________________________________________________________________________K9, 4
L0____________________________________________________________________________________________________L0, 0
L1____________________________________________________________________________________________________L1, 4
L2____________________________________________________________________________________________________L2, 0
L3____________________________________________________________________________________________________L3, 4
L4____________________________________________________________________________________________________L4, 0
L5____________________________________________________________________________________________________L5, 4
L6____________________________________________________________________________________________________L6, 0
L7____________________________________________________________________________________________________L7, 4
L8____________________________________________________________________________________________________L8, 0
L9____________________________________________________________________________________________________L9, 4
M0____________________________________________________________________________________________________M0, 0
M1____________________________________________________________________________________________________M1, 4
M2____________________________________________________________________________________________________M2, 0
M3____________________________________________________________________________________________________M3, 4
M4____________________________________________________________________________________________________M4, 0
M5____________________________________________________________________________________________________M5, 4
M6____________________________________________________________________________________________________M6, 0
M7____________________________________________________________________________________________________M7, 4
M8____________________________________________________________________________________________________M8, 0
M9____________________________________________________________________________________________________M9, 4
N0____________________________________________________________________________________________________N0, 0
N1____________________________________________________________________________________________________N1, 4
N2____________________________________________________________________________________________________N2, 0
N3____________________________________________________________________________________________________N3, 4
N4____________________________________________________________________________________________________N4, 0
N5____________________________________________________________________________________________________N5, 4
N6____________________________________________________________________________________________________N6, 0
N7____________________________________________________________________________________________________N7, 4
N8____________________________________________________________________________________________________N8, 0
N9____________________________________________________________________________________________________N9, 4
O0____________________________________________________________________________________________________O0, 0
O1____________________________________________________________________________________________________O1, 4
O2____________________________________________________________________________________________________O2, 0
O3____________________________________________________________________________________________________O3, 4
O4____________________________________________________________________________________________________O4, 0
O5____________________________________________________________________________________________________O5, 4
O6____________________________________________________________________________________________________O6, 0
O7____________________________________________________________________________________________________O7, 4
O8____________________________________________________________________________________________________O8, 0
O9____________________________________________________________________________________________________O9, 4
P0____________________________________________________________________________________________________P0, 0
P1____________________________________________________________________________________________________P1, 4
P2____________________________________________________________________________________________________P2, 0
P3____________________________________________________________________________________________________P3, 4
P4____________________________________________________________________________________________________P4, 0
P5____________________________________________________________________________________________________P5, 4
P6____________________________________________________________________________________________________P6, 0
P7____________________________________________________________________________________________________P7, 4
P8____________________________________________________________________________________________________P8, 0
P9____________________________________________________________________________________________________P9, 4
Q0____________________________________________________________________________________________________Q0, 0
Q1____________________________________________________________________________________________________Q1, 4
Q2____________________________________________________________________________________________________Q2, 0
Q3____________________________________________________________________________________________________Q3, 4
Q4____________________________________________________________________________________________________Q4, 0
Q5____________________________________________________________________________________________________Q5, 4
Q6____________________________________________________________________________________________________Q6, 0
Q7____________________________________________________________________________________________________Q7, 4
Q8____________________________________________________________________________________________________Q8, 0
Q9____________________________________________________________________________________________________Q9, 4
R0____________________________________________________________________________________________________R0, 0
R1____________________________________________________________________________________________________R1, 4
R2____________________________________________________________________________________________________R2, 0
R3____________________________________________________________________________________________________R3, 4
R4____________________________________________________________________________________________________R4, 0
R5____________________________________________________________________________________________________R5, 4
R6____________________________________________________________________________________________________R6, 0
R7____________________________________________________________________________________________________R7, 4
R8____________________________________________________________________________________________________R8, 0
R9____________________________________________________________________________________________________R9, 4
S0____________________________________________________________________________________________________S0, 0
S1____________________________________________________________________________________________________S1, 4
S2____________________________________________________________________________________________________S2, 0
S3____________________________________________________________________________________________________S3, 4
S4____________________________________________________________________________________________________S4, 0
S5____________________________________________________________________________________________________S5, 4
S6____________________________________________________________________________________________________S6, 0
S7____________________________________________________________________________________________________S7, 4
S8____________________________________________________________________________________________________S8, 0
S9____________________________________________________________________________________________________S9, 4
T0____________________________________________________________________________________________________T0, 0
T1____________________________________________________________________________________________________T1, 4
T2____________________________________________________________________________________________________T2, 0
T3____________________________________________________________________________________________________T3, 4
T4____________________________________________________________________________________________________T4, 0
T5____________________________________________________________________________________________________T5, 4
T6____________________________________________________________________________________________________T6, 0
T7____________________________________________________________________________________________________T7, 4
T8____________________________________________________________________________________________________T8, 0
T9____________________________________________________________________________________________________T9, 4
U0____________________________________________________________________________________________________U0, 0
U1____________________________________________________________________________________________________U1, 4
U2____________________________________________________________________________________________________U2, 0
U3____________________________________________________________________________________________________U3, 4
U4____________________________________________________________________________________________________U4, 0
U5____________________________________________________________________________________________________U5, 4
U6____________________________________________________________________________________________________U6, 0
U7____________________________________________________________________________________________________U7, 4
U8____________________________________________________________________________________________________U8, 0
U9____________________________________________________________________________________________________U9, 4
V0____________________________________________________________________________________________________V0, 0
V1____________________________________________________________________________________________________V1, 4
V2____________________________________________________________________________________________________V2, 0
V3____________________________________________________________________________________________________V3, 4
V4____________________________________________________________________________________________________V4, 0
V5____________________________________________________________________________________________________V5, 4
V6____________________________________________________________________________________________________V6, 0
V7____________________________________________________________________________________________________V7, 4
V8____________________________________________________________________________________________________V8, 0
V9____________________________________________________________________________________________________V9, 4
W0____________________________________________________________________________________________________W0, 0
W1____________________________________________________________________________________________________W1, 4
W2____________________________________________________________________________________________________W2, 0
W3____________________________________________________________________________________________________W3, 4
W4____________________________________________________________________________________________________W4, 0
W5____________________________________________________________________________________________________W5, 4
W6____________________________________________________________________________________________________W6, 0
W7____________________________________________________________________________________________________W7, 4
W8____________________________________________________________________________________________________W8, 0
W9____________________________________________________________________________________________________W9, 4
X0____________________________________________________________________________________________________X0, 0
X1____________________________________________________________________________________________________X1, 4
X2____________________________________________________________________________________________________X2, 0
X3____________________________________________________________________________________________________X3, 4
X4____________________________________________________________________________________________________X4, 0
X5____________________________________________________________________________________________________X5, 4
X6____________________________________________________________________________________________________X6, 0
X7____________________________________________________________________________________________________X7, 4
X8____________________________________________________________________________________________________X8, 0
X9____________________________________________________________________________________________________X9, 4
Y0____________________________________________________________________________________________________Y0, 0
Y1____________________________________________________________________________________________________Y1, 4
Y2____________________________________________________________________________________________________Y2, 0
Y3____________________________________________________________________________________________________Y3, 4
Y4____________________________________________________________________________________________________Y4, 0
Y5____________________________________________________________________________________________________Y5, 4
Y6____________________________________________________________________________________________________Y6, 0
Y7____________________________________________________________________________________________________Y7, 4
Y8____________________________________________________________________________________________________Y8, 0
Y9____________________________________________________________________________________________________Y9, 4
Z0____________________________________________________________________________________________________Z0, 0
Z1____________________________________________________________________________________________________Z1, 4
Z2____________________________________________________________________________________________________Z2, 0
Z3____________________________________________________________________________________________________Z3, 4
Z4____________________________________________________________________________________________________Z4, 0
Z5____________________________________________________________________________________________________Z5, 4
Z6____________________________________________________________________________________________________Z6, 0
Z7____________________________________________________________________________________________________Z7, 4
Z8____________________________________________________________________________________________________Z8, 0
Z9____________________________________________________________________________________________________Z9, 4
%%
This file is for testing joined prefixes
%%
ai, 0
bj, 4
aak, 0
bbl, 4
aaaam, 0
bbbbn, 0
%%
This file is for testing joined suffixes
%%
ia, 0
jb, 4
kaa, 0
lbb, 4
maaaa, 0
nbbbb, 0
%%
...@@ -53,26 +53,151 @@ ...@@ -53,26 +53,151 @@
#include "url/gurl.h" #include "url/gurl.h"
#include "url/url_parse.h" #include "url/url_parse.h"
#include "effective_tld_names.cc"
namespace net { namespace net {
namespace registry_controlled_domains { namespace registry_controlled_domains {
namespace { namespace {
#include "net/base/registry_controlled_domains/effective_tld_names-inc.cc"
// See make_dafsa.py for documentation of the generated dafsa byte array.
const unsigned char* g_graph = kDafsa;
size_t g_graph_length = sizeof(kDafsa);
const int kNotFound = -1;
const int kExceptionRule = 1; const int kExceptionRule = 1;
const int kWildcardRule = 2; const int kWildcardRule = 2;
const int kPrivateRule = 4; const int kPrivateRule = 4;
const FindDomainPtr kDefaultFindDomainFunction = Perfect_Hash::FindDomain; // Read next offset from pos.
// Returns true if an offset could be read, false otherwise.
bool GetNextOffset(const unsigned char** pos, const unsigned char* end,
const unsigned char** offset) {
if (*pos == end)
return false;
// When reading an offset the byte array must always contain at least
// three more bytes to consume. First the offset to read, then a node
// to skip over and finally a destination node. No object can be smaller
// than one byte.
CHECK_LT(*pos + 2, end);
size_t bytes_consumed;
switch (**pos & 0x60) {
case 0x60: // Read three byte offset
*offset += (((*pos)[0] & 0x1F) << 16) | ((*pos)[1] << 8) | (*pos)[2];
bytes_consumed = 3;
break;
case 0x40: // Read two byte offset
*offset += (((*pos)[0] & 0x1F) << 8) | (*pos)[1];
bytes_consumed = 2;
break;
default:
*offset += (*pos)[0] & 0x3F;
bytes_consumed = 1;
}
if ((**pos & 0x80) != 0) {
*pos = end;
} else {
*pos += bytes_consumed;
}
return true;
}
// Check if byte at offset is last in label.
bool IsEOL(const unsigned char* offset, const unsigned char* end) {
CHECK_LT(offset, end);
return (*offset & 0x80) != 0;
}
// Check if byte at offset matches first character in key.
// This version matches characters not last in label.
bool IsMatch(const unsigned char* offset, const unsigned char* end,
const char* key) {
CHECK_LT(offset, end);
return *offset == *key;
}
// Check if byte at offset matches first character in key.
// This version matches characters last in label.
bool IsEndCharMatch(const unsigned char* offset, const unsigned char* end,
const char* key) {
CHECK_LT(offset, end);
return *offset == (*key | 0x80);
}
// 'stringpool' is defined as a macro by the gperf-generated // Read return value at offset.
// "effective_tld_names.cc". Provide a real constant value for it instead. // Returns true if a return value could be read, false otherwise.
const char* const kDefaultStringPool = stringpool; bool GetReturnValue(const unsigned char* offset, const unsigned char* end,
#undef stringpool int* return_value) {
CHECK_LT(offset, end);
if ((*offset & 0xE0) == 0x80) {
*return_value = *offset & 0x0F;
return true;
}
return false;
}
FindDomainPtr g_find_domain_function = kDefaultFindDomainFunction; // Lookup a domain key in a byte array generated by make_dafsa.py.
const char* g_stringpool = kDefaultStringPool; // The rule type is returned if key is found, otherwise kNotFound is returned.
int LookupString(const unsigned char* graph, size_t length, const char* key,
size_t key_length) {
const unsigned char* pos = graph;
const unsigned char* end = graph + length;
const unsigned char* offset = pos;
const char* key_end = key + key_length;
while (GetNextOffset(&pos, end, &offset)) {
// char <char>+ end_char offsets
// char <char>+ return value
// char end_char offsets
// char return value
// end_char offsets
// return_value
bool did_consume = false;
if (key != key_end && !IsEOL(offset, end)) {
// Leading <char> is not a match. Don't dive into this child
if (!IsMatch(offset, end, key))
continue;
did_consume = true;
++offset;
++key;
// Possible matches at this point:
// <char>+ end_char offsets
// <char>+ return value
// end_char offsets
// return value
// Remove all remaining <char> nodes possible
while (!IsEOL(offset, end) && key != key_end) {
if (!IsMatch(offset, end, key))
return kNotFound;
++key;
++offset;
}
}
// Possible matches at this point:
// end_char offsets
// return_value
// If one or more <char> elements were consumed, a failure
// to match is terminal. Otherwise, try the next node.
if (key == key_end) {
int return_value;
if (GetReturnValue(offset, end, &return_value))
return return_value;
// The DAFSA guarantees that if the first char is a match, all
// remaining char elements MUST match if the key is truly present.
if (did_consume)
return kNotFound;
continue;
}
if (!IsEndCharMatch(offset, end, key)) {
if (did_consume)
return kNotFound; // Unexpected
continue;
}
++key;
pos = ++offset; // Dive into child
}
return kNotFound; // No match
}
size_t GetRegistryLengthImpl( size_t GetRegistryLengthImpl(
const std::string& host, const std::string& host,
...@@ -105,46 +230,40 @@ size_t GetRegistryLengthImpl( ...@@ -105,46 +230,40 @@ size_t GetRegistryLengthImpl(
return 0; // This can't have a registry + domain. return 0; // This can't have a registry + domain.
while (1) { while (1) {
const char* domain_str = host.data() + curr_start; const char* domain_str = host.data() + curr_start;
int domain_length = host_check_len - curr_start; size_t domain_length = host_check_len - curr_start;
const DomainRule* rule = g_find_domain_function(domain_str, domain_length); int type = LookupString(g_graph, g_graph_length, domain_str, domain_length);
bool do_check =
// We need to compare the string after finding a match because the type != kNotFound && (!(type & kPrivateRule) ||
// no-collisions of perfect hashing only refers to items in the set. Since private_filter == INCLUDE_PRIVATE_REGISTRIES);
// we're searching for arbitrary domains, there could be collisions.
// Furthermore, if the apparent match is a private registry and we're not // If the apparent match is a private registry and we're not including
// including those, it can't be an actual match. // those, it can't be an actual match.
if (rule) { if (do_check) {
bool do_check = !(rule->type & kPrivateRule) || // Exception rules override wildcard rules when the domain is an exact
private_filter == INCLUDE_PRIVATE_REGISTRIES; // match, but wildcards take precedence when there's a subdomain.
if (do_check && base::strncasecmp(domain_str, if (type & kWildcardRule && (prev_start != std::string::npos)) {
g_stringpool + rule->name_offset, // If prev_start == host_check_begin, then the host is the registry
domain_length) == 0) { // itself, so return 0.
// Exception rules override wildcard rules when the domain is an exact return (prev_start == host_check_begin) ? 0
// match, but wildcards take precedence when there's a subdomain. : (host.length() - prev_start);
if (rule->type & kWildcardRule && (prev_start != std::string::npos)) { }
// If prev_start == host_check_begin, then the host is the registry
// itself, so return 0.
return (prev_start == host_check_begin) ?
0 : (host.length() - prev_start);
}
if (rule->type & kExceptionRule) { if (type & kExceptionRule) {
if (next_dot == std::string::npos) { if (next_dot == std::string::npos) {
// If we get here, we had an exception rule with no dots (e.g. // If we get here, we had an exception rule with no dots (e.g.
// "!foo"). This would only be valid if we had a corresponding // "!foo"). This would only be valid if we had a corresponding
// wildcard rule, which would have to be "*". But we explicitly // wildcard rule, which would have to be "*". But we explicitly
// disallow that case, so this kind of rule is invalid. // disallow that case, so this kind of rule is invalid.
NOTREACHED() << "Invalid exception rule"; NOTREACHED() << "Invalid exception rule";
return 0; return 0;
}
return host.length() - next_dot - 1;
} }
return host.length() - next_dot - 1;
// If curr_start == host_check_begin, then the host is the registry
// itself, so return 0.
return (curr_start == host_check_begin) ?
0 : (host.length() - curr_start);
} }
// If curr_start == host_check_begin, then the host is the registry
// itself, so return 0.
return (curr_start == host_check_begin) ? 0
: (host.length() - curr_start);
} }
if (next_dot >= host_check_len) // Catches std::string::npos as well. if (next_dot >= host_check_len) // Catches std::string::npos as well.
...@@ -260,10 +379,16 @@ size_t GetRegistryLength( ...@@ -260,10 +379,16 @@ size_t GetRegistryLength(
return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter); return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter);
} }
void SetFindDomainFunctionAndStringPoolForTesting(FindDomainPtr function, void SetFindDomainGraph() {
const char* stringpool) { g_graph = kDafsa;
g_find_domain_function = function ? function : kDefaultFindDomainFunction; g_graph_length = sizeof(kDafsa);
g_stringpool = stringpool ? stringpool : kDefaultStringPool; }
void SetFindDomainGraph(const unsigned char* domains, size_t length) {
CHECK(domains);
CHECK_NE(length, 0u);
g_graph = domains;
g_graph_length = length;
} }
} // namespace registry_controlled_domains } // namespace registry_controlled_domains
......
...@@ -226,11 +226,12 @@ NET_EXPORT size_t GetRegistryLength(const std::string& host, ...@@ -226,11 +226,12 @@ NET_EXPORT size_t GetRegistryLength(const std::string& host,
typedef const struct DomainRule* (*FindDomainPtr)(const char *, unsigned int); typedef const struct DomainRule* (*FindDomainPtr)(const char *, unsigned int);
// Used for unit tests, so that a different perfect hash map from the full // Used for unit tests. Use default domains.
// list is used. Set to NULL to use the Default function. NET_EXPORT_PRIVATE void SetFindDomainGraph();
NET_EXPORT_PRIVATE void SetFindDomainFunctionAndStringPoolForTesting(
FindDomainPtr fn, const char* stringpool);
// Used for unit tests, so that a frozen list of domains is used.
NET_EXPORT_PRIVATE void SetFindDomainGraph(const unsigned char* domains,
size_t length);
} // namespace registry_controlled_domains } // namespace registry_controlled_domains
} // namespace net } // namespace net
......
...@@ -6,15 +6,26 @@ ...@@ -6,15 +6,26 @@
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h" #include "url/gurl.h"
#include "effective_tld_names_unittest1.cc" namespace {
static const char* const Perfect_Hash_Test1_stringpool = stringpool1; namespace test1 {
#undef TOTAL_KEYWORDS #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc.cc"
#undef MIN_WORD_LENGTH }
#undef MAX_WORD_LENGTH namespace test2 {
#undef MIN_HASH_VALUE #include "net/base/registry_controlled_domains/effective_tld_names_unittest2-inc.cc"
#undef MAX_HASH_VALUE }
#include "effective_tld_names_unittest2.cc" namespace test3 {
static const char* const Perfect_Hash_Test2_stringpool = stringpool2; #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc.cc"
}
namespace test4 {
#include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc.cc"
}
namespace test5 {
#include "net/base/registry_controlled_domains/effective_tld_names_unittest5-inc.cc"
}
namespace test6 {
#include "net/base/registry_controlled_domains/effective_tld_names_unittest6-inc.cc"
}
} // namespace
namespace net { namespace net {
namespace registry_controlled_domains { namespace registry_controlled_domains {
...@@ -50,6 +61,12 @@ size_t GetRegistryLengthFromHost( ...@@ -50,6 +61,12 @@ size_t GetRegistryLengthFromHost(
return GetRegistryLength(host, unknown_filter, EXCLUDE_PRIVATE_REGISTRIES); return GetRegistryLength(host, unknown_filter, EXCLUDE_PRIVATE_REGISTRIES);
} }
size_t GetRegistryLengthFromHostIncludingPrivate(
const std::string& host,
UnknownRegistryFilter unknown_filter) {
return GetRegistryLength(host, unknown_filter, INCLUDE_PRIVATE_REGISTRIES);
}
bool CompareDomains(const std::string& url1, const std::string& url2) { bool CompareDomains(const std::string& url1, const std::string& url2) {
GURL g1 = GURL(url1); GURL g1 = GURL(url1);
GURL g2 = GURL(url2); GURL g2 = GURL(url2);
...@@ -60,17 +77,16 @@ bool CompareDomains(const std::string& url1, const std::string& url2) { ...@@ -60,17 +77,16 @@ bool CompareDomains(const std::string& url1, const std::string& url2) {
class RegistryControlledDomainTest : public testing::Test { class RegistryControlledDomainTest : public testing::Test {
protected: protected:
void UseDomainData(FindDomainPtr function, const char* const stringpool) { template <typename Graph>
SetFindDomainFunctionAndStringPoolForTesting(function, stringpool); void UseDomainData(const Graph& graph) {
SetFindDomainGraph(graph, sizeof(Graph));
} }
virtual void TearDown() { virtual void TearDown() { SetFindDomainGraph(); }
SetFindDomainFunctionAndStringPoolForTesting(NULL, NULL);
}
}; };
TEST_F(RegistryControlledDomainTest, TestGetDomainAndRegistry) { TEST_F(RegistryControlledDomainTest, TestGetDomainAndRegistry) {
UseDomainData(Perfect_Hash_Test1::FindDomain, Perfect_Hash_Test1_stringpool); UseDomainData(test1::kDafsa);
// Test GURL version of GetDomainAndRegistry(). // Test GURL version of GetDomainAndRegistry().
EXPECT_EQ("baz.jp", GetDomainFromURL("http://a.baz.jp/file.html")); // 1 EXPECT_EQ("baz.jp", GetDomainFromURL("http://a.baz.jp/file.html")); // 1
...@@ -129,7 +145,7 @@ TEST_F(RegistryControlledDomainTest, TestGetDomainAndRegistry) { ...@@ -129,7 +145,7 @@ TEST_F(RegistryControlledDomainTest, TestGetDomainAndRegistry) {
} }
TEST_F(RegistryControlledDomainTest, TestGetRegistryLength) { TEST_F(RegistryControlledDomainTest, TestGetRegistryLength) {
UseDomainData(Perfect_Hash_Test1::FindDomain, Perfect_Hash_Test1_stringpool); UseDomainData(test1::kDafsa);
// Test GURL version of GetRegistryLength(). // Test GURL version of GetRegistryLength().
EXPECT_EQ(2U, GetRegistryLengthFromURL("http://a.baz.jp/file.html", EXPECT_EQ(2U, GetRegistryLengthFromURL("http://a.baz.jp/file.html",
...@@ -248,7 +264,7 @@ TEST_F(RegistryControlledDomainTest, TestGetRegistryLength) { ...@@ -248,7 +264,7 @@ TEST_F(RegistryControlledDomainTest, TestGetRegistryLength) {
} }
TEST_F(RegistryControlledDomainTest, TestSameDomainOrHost) { TEST_F(RegistryControlledDomainTest, TestSameDomainOrHost) {
UseDomainData(Perfect_Hash_Test2::FindDomain, Perfect_Hash_Test2_stringpool); UseDomainData(test2::kDafsa);
EXPECT_TRUE(CompareDomains("http://a.b.bar.jp/file.html", EXPECT_TRUE(CompareDomains("http://a.b.bar.jp/file.html",
"http://a.b.bar.jp/file.html")); // b.bar.jp "http://a.b.bar.jp/file.html")); // b.bar.jp
...@@ -295,7 +311,7 @@ TEST_F(RegistryControlledDomainTest, TestDefaultData) { ...@@ -295,7 +311,7 @@ TEST_F(RegistryControlledDomainTest, TestDefaultData) {
} }
TEST_F(RegistryControlledDomainTest, TestPrivateRegistryHandling) { TEST_F(RegistryControlledDomainTest, TestPrivateRegistryHandling) {
UseDomainData(Perfect_Hash_Test1::FindDomain, Perfect_Hash_Test1_stringpool); UseDomainData(test1::kDafsa);
// Testing the same dataset for INCLUDE_PRIVATE_REGISTRIES and // Testing the same dataset for INCLUDE_PRIVATE_REGISTRIES and
// EXCLUDE_PRIVATE_REGISTRIES arguments. // EXCLUDE_PRIVATE_REGISTRIES arguments.
...@@ -347,6 +363,138 @@ TEST_F(RegistryControlledDomainTest, TestPrivateRegistryHandling) { ...@@ -347,6 +363,138 @@ TEST_F(RegistryControlledDomainTest, TestPrivateRegistryHandling) {
INCLUDE_UNKNOWN_REGISTRIES)); INCLUDE_UNKNOWN_REGISTRIES));
} }
TEST_F(RegistryControlledDomainTest, TestDafsaTwoByteOffsets) {
UseDomainData(test3::kDafsa);
// Testing to lookup keys in a DAFSA with two byte offsets.
// This DAFSA is constructed so that labels begin and end with unique
// characters, which makes it impossible to merge labels. Each inner node
// is about 100 bytes and a one byte offset can at most add 64 bytes to
// previous offset. Thus the paths must go over two byte offsets.
const char* key0 =
"a.b.6____________________________________________________"
"________________________________________________6";
const char* key1 =
"a.b.7____________________________________________________"
"________________________________________________7";
const char* key2 =
"a.b.a____________________________________________________"
"________________________________________________8";
EXPECT_EQ(102U, GetRegistryLengthFromHost(key0, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(0U, GetRegistryLengthFromHost(key1, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(102U,
GetRegistryLengthFromHostIncludingPrivate(
key1, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(0U, GetRegistryLengthFromHost(key2, EXCLUDE_UNKNOWN_REGISTRIES));
}
TEST_F(RegistryControlledDomainTest, TestDafsaThreeByteOffsets) {
UseDomainData(test4::kDafsa);
// Testing to lookup keys in a DAFSA with three byte offsets.
// This DAFSA is constructed so that labels begin and end with unique
// characters, which makes it impossible to merge labels. The byte array
// has a size of ~54k. A two byte offset can add at most add 8k to the
// previous offset. Since we can skip only forward in memory, the nodes
// representing the return values must be located near the end of the byte
// array. The probability that we can reach from an arbitrary inner node to
// a return value without using a three byte offset is small (but not zero).
// The test is repeated with some different keys and with a reasonable
// probability at least one of the tested paths has go over a three byte
// offset.
const char* key0 =
"a.b.Z6___________________________________________________"
"_________________________________________________Z6";
const char* key1 =
"a.b.Z7___________________________________________________"
"_________________________________________________Z7";
const char* key2 =
"a.b.Za___________________________________________________"
"_________________________________________________Z8";
EXPECT_EQ(104U, GetRegistryLengthFromHost(key0, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(0U, GetRegistryLengthFromHost(key1, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(104U,
GetRegistryLengthFromHostIncludingPrivate(
key1, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(0U, GetRegistryLengthFromHost(key2, EXCLUDE_UNKNOWN_REGISTRIES));
}
TEST_F(RegistryControlledDomainTest, TestDafsaJoinedPrefixes) {
UseDomainData(test5::kDafsa);
// Testing to lookup keys in a DAFSA with compressed prefixes.
// This DAFSA is constructed from words with similar prefixes but distinct
// suffixes. The DAFSA will then form a trie with the implicit source node
// as root.
const char* key0 = "a.b.ai";
const char* key1 = "a.b.bj";
const char* key2 = "a.b.aak";
const char* key3 = "a.b.bbl";
const char* key4 = "a.b.aaa";
const char* key5 = "a.b.bbb";
const char* key6 = "a.b.aaaam";
const char* key7 = "a.b.bbbbn";
EXPECT_EQ(2U, GetRegistryLengthFromHost(key0, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(0U, GetRegistryLengthFromHost(key1, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(2U,
GetRegistryLengthFromHostIncludingPrivate(
key1, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(3U, GetRegistryLengthFromHost(key2, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(0U, GetRegistryLengthFromHost(key3, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(3U,
GetRegistryLengthFromHostIncludingPrivate(
key3, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(0U,
GetRegistryLengthFromHostIncludingPrivate(
key4, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(0U,
GetRegistryLengthFromHostIncludingPrivate(
key5, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(5U, GetRegistryLengthFromHost(key6, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(5U, GetRegistryLengthFromHost(key7, EXCLUDE_UNKNOWN_REGISTRIES));
}
TEST_F(RegistryControlledDomainTest, TestDafsaJoinedSuffixes) {
UseDomainData(test6::kDafsa);
// Testing to lookup keys in a DAFSA with compressed suffixes.
// This DAFSA is constructed from words with similar suffixes but distinct
// prefixes. The DAFSA will then form a trie with the implicit sink node as
// root.
const char* key0 = "a.b.ia";
const char* key1 = "a.b.jb";
const char* key2 = "a.b.kaa";
const char* key3 = "a.b.lbb";
const char* key4 = "a.b.aaa";
const char* key5 = "a.b.bbb";
const char* key6 = "a.b.maaaa";
const char* key7 = "a.b.nbbbb";
EXPECT_EQ(2U, GetRegistryLengthFromHost(key0, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(0U, GetRegistryLengthFromHost(key1, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(2U,
GetRegistryLengthFromHostIncludingPrivate(
key1, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(3U, GetRegistryLengthFromHost(key2, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(0U, GetRegistryLengthFromHost(key3, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(3U,
GetRegistryLengthFromHostIncludingPrivate(
key3, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(0U,
GetRegistryLengthFromHostIncludingPrivate(
key4, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(0U,
GetRegistryLengthFromHostIncludingPrivate(
key5, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(5U, GetRegistryLengthFromHost(key6, EXCLUDE_UNKNOWN_REGISTRIES));
EXPECT_EQ(5U, GetRegistryLengthFromHost(key7, EXCLUDE_UNKNOWN_REGISTRIES));
}
} // namespace registry_controlled_domains } // namespace registry_controlled_domains
} // namespace net } // namespace net
...@@ -44,6 +44,42 @@ ...@@ -44,6 +44,42 @@
'net.gypi', 'net.gypi',
], ],
'targets': [ 'targets': [
{
'target_name': 'net_derived_sources',
'type': 'none',
'sources': [
'base/registry_controlled_domains/effective_tld_names.gperf',
'base/registry_controlled_domains/effective_tld_names_unittest1.gperf',
'base/registry_controlled_domains/effective_tld_names_unittest2.gperf',
'base/registry_controlled_domains/effective_tld_names_unittest3.gperf',
'base/registry_controlled_domains/effective_tld_names_unittest4.gperf',
'base/registry_controlled_domains/effective_tld_names_unittest5.gperf',
'base/registry_controlled_domains/effective_tld_names_unittest6.gperf',
],
'rules': [
{
'rule_name': 'dafsa',
'extension': 'gperf',
'outputs': [
'<(SHARED_INTERMEDIATE_DIR)/net/<(RULE_INPUT_DIRNAME)/<(RULE_INPUT_ROOT)-inc.cc',
],
'inputs': [
'tools/tld_cleanup/make_dafsa.py',
],
'action': [
'python',
'tools/tld_cleanup/make_dafsa.py',
'<(RULE_INPUT_PATH)',
'<(SHARED_INTERMEDIATE_DIR)/net/<(RULE_INPUT_DIRNAME)/<(RULE_INPUT_ROOT)-inc.cc',
],
},
],
'direct_dependent_settings': {
'include_dirs': [
'<(SHARED_INTERMEDIATE_DIR)'
],
},
},
{ {
'target_name': 'net', 'target_name': 'net',
'type': '<(component)', 'type': '<(component)',
...@@ -58,6 +94,7 @@ ...@@ -58,6 +94,7 @@
'../third_party/icu/icu.gyp:icuuc', '../third_party/icu/icu.gyp:icuuc',
'../third_party/zlib/zlib.gyp:zlib', '../third_party/zlib/zlib.gyp:zlib',
'../url/url.gyp:url_lib', '../url/url.gyp:url_lib',
'net_derived_sources',
'net_resources', 'net_resources',
], ],
'sources': [ 'sources': [
...@@ -503,7 +540,8 @@ ...@@ -503,7 +540,8 @@
'../url/url.gyp:url_lib', '../url/url.gyp:url_lib',
'http_server', 'http_server',
'net', 'net',
'net_test_support' 'net_derived_sources',
'net_test_support',
], ],
'sources': [ 'sources': [
'<@(net_test_sources)', '<@(net_test_sources)',
......
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Chromium presubmit script for src/net/tools/tld_cleanup."""
def _RunMakeDafsaTests(input_api, output_api):
"""Runs unittest for make_dafsa if any related file has been modified."""
files = ('net/tools/tld_cleanup/make_dafsa.py',
'net/tools/tld_cleanup/make_dafsa_unittest.py')
if not any(f in input_api.LocalPaths() for f in files):
return
test_path = input_api.os_path.join(input_api.PresubmitLocalPath(),
'make_dafsa_unittest.py')
cmd_name = 'make_dafsa_unittest'
cmd = [input_api.python_executable, test_path]
test_cmd = input_api.Command(
name=cmd_name,
cmd=cmd,
kwargs={},
message=output_api.PresubmitPromptWarning)
return input_api.RunTests([test_cmd])
def CheckChangeOnUpload(input_api, output_api):
return _RunMakeDafsaTests(input_api, output_api)
def CheckChangeOnCommit(input_api, output_api):
return _RunMakeDafsaTests(input_api, output_api)
...@@ -20,12 +20,9 @@ When updating src/net/base/registry_controlled_domains/effective_tld_names.dat: ...@@ -20,12 +20,9 @@ When updating src/net/base/registry_controlled_domains/effective_tld_names.dat:
src/build/Debug. It will re-generate src/build/Debug. It will re-generate
src/net/base/registry_controlled_domains/effective_tld_names.gperf. src/net/base/registry_controlled_domains/effective_tld_names.gperf.
6. Run gperf on the new effective_tld_names.gperf: 6. Check in the updated effective_tld_names.dat, effective_tld_names.gperf
pushd src/net/base/registry_controlled_domains;
gperf -a -L "C++" -C -c -o -t -k '*' -NFindDomain -P -K name_offset -D -m 10 \
effective_tld_names.gperf > effective_tld_names.cc;
popd;
It will produce a new effective_tld_names.cc.
7. Check in the updated effective_tld_names.dat, effective_tld_names.gperf, Note that gperf is no longer used for effective_tld_names, but when building
and effective_tld_names.cc together. chromium the file effective_tld_names.gperf will be parsed by make_dafsa.py
to generate the file effective_tld_names-inc.cc, which is included in
registry_controlled_domain.cc
#!/usr/bin/env python
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
A Deterministic acyclic finite state automaton (DAFSA) is a compact
representation of an unordered word list (dictionary).
http://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton
This python program converts a list of strings to a byte array in C++.
This python program fetches strings and return values from a gperf file
and generates a C++ file with a byte array representing graph that can be
used as a memory efficient replacement for the perfect hash table.
The input strings are assumed to consist of printable 7-bit ASCII characters
and the return values are assumed to be one digit integers.
In this program a DAFSA is a diamond shaped graph starting at a common
source node and ending at a common sink node. All internal nodes contain
a label and each word is represented by the labels in one path from
the source node to the sink node.
The following python represention is used for nodes:
Source node: [ children ]
Internal node: (label, [ children ])
Sink node: None
The graph is first compressed by prefixes like a trie. In the next step
suffixes are compressed so that the graph gets diamond shaped. Finally
one to one linked nodes are replaced by nodes with the labels joined.
The order of the operations is crucial since lookups will be performed
starting from the source with no backtracking. Thus a node must have at
most one child with a label starting by the same character. The output
is also arranged so that all jumps are to increasing addresses, thus forward
in memory.
The generated output has suffix free decoding so that the sign of leading
bits in a link (a reference to a child node) indicate if it has a size of one,
two or three bytes and if it is the last outgoing link from the actual node.
A node label is terminated by a byte with the leading bit set.
The generated byte array can described by the following BNF:
<byte> ::= < 8-bit value in range [0x00-0xFF] >
<char> ::= < printable 7-bit ASCII character, byte in range [0x20-0x7F] >
<end_char> ::= < char + 0x80, byte in range [0xA0-0xFF] >
<return value> ::= < value + 0x80, byte in range [0x80-0x8F] >
<offset1> ::= < byte in range [0x00-0x3F] >
<offset2> ::= < byte in range [0x40-0x5F] >
<offset3> ::= < byte in range [0x60-0x7F] >
<end_offset1> ::= < byte in range [0x80-0xBF] >
<end_offset2> ::= < byte in range [0xC0-0xDF] >
<end_offset3> ::= < byte in range [0xE0-0xFF] >
<prefix> ::= <char>
<label> ::= <end_char>
| <char> <label>
<end_label> ::= <return_value>
| <char> <end_label>
<offset> ::= <offset1>
| <offset2> <byte>
| <offset3> <byte> <byte>
<end_offset> ::= <end_offset1>
| <end_offset2> <byte>
| <end_offset3> <byte> <byte>
<offsets> ::= <end_offset>
| <offset> <offsets>
<source> ::= <offsets>
<node> ::= <label> <offsets>
| <prefix> <node>
| <end_label>
<dafsa> ::= <source>
| <dafsa> <node>
Decoding:
<char> -> printable 7-bit ASCII character
<end_char> & 0x7F -> printable 7-bit ASCII character
<return value> & 0x0F -> integer
<offset1 & 0x3F> -> integer
((<offset2> & 0x1F>) << 8) + <byte> -> integer
((<offset3> & 0x1F>) << 16) + (<byte> << 8) + <byte> -> integer
end_offset1, end_offset2 and and_offset3 are decoded same as offset1,
offset2 and offset3 respectively.
The first offset in a list of offsets is the distance in bytes between the
offset itself and the first child node. Subsequent offsets are the distance
between previous child node and next child node. Thus each offset links a node
to a child node. The distance is always counted between start addresses, i.e.
first byte in decoded offset or first byte in child node.
Example 1:
%%
aa, 1
a, 2
%%
The input is first parsed to a list of words:
["aa1", "a2"]
A fully expanded graph is created from the words:
source = [node1, node4]
node1 = ("a", [node2])
node2 = ("a", [node3])
node3 = ("\x01", [sink])
node4 = ("a", [node5])
node5 = ("\x02", [sink])
sink = None
Compression results in the following graph:
source = [node1]
node1 = ("a", [node2, node3])
node2 = ("\x02", [sink])
node3 = ("a\x01", [sink])
sink = None
A C++ representation of the compressed graph is generated:
const unsigned char dafsa[7] = {
0x81, 0xE1, 0x02, 0x81, 0x82, 0x61, 0x81,
};
The bytes in the generated array has the following meaning:
0: 0x81 <end_offset1> child at position 0 + (0x81 & 0x3F) -> jump to 1
1: 0xE1 <end_char> label character (0xE1 & 0x7F) -> match "a"
2: 0x02 <offset1> child at position 2 + (0x02 & 0x3F) -> jump to 4
3: 0x81 <end_offset1> child at position 4 + (0x81 & 0x3F) -> jump to 5
4: 0x82 <return_value> 0x82 & 0x0F -> return 2
5: 0x61 <char> label character 0x61 -> match "a"
6: 0x81 <return_value> 0x81 & 0x0F -> return 1
Example 2:
%%
aa, 1
bbb, 2
baa, 1
%%
The input is first parsed to a list of words:
["aa1", "bbb2", "baa1"]
Compression results in the following graph:
source = [node1, node2]
node1 = ("b", [node2, node3])
node2 = ("aa\x01", [sink])
node3 = ("bb\x02", [sink])
sink = None
A C++ representation of the compressed graph is generated:
const unsigned char dafsa[11] = {
0x02, 0x83, 0xE2, 0x02, 0x83, 0x61, 0x61, 0x81, 0x62, 0x62, 0x82,
};
The bytes in the generated array has the following meaning:
0: 0x02 <offset1> child at position 0 + (0x02 & 0x3F) -> jump to 2
1: 0x83 <end_offset1> child at position 2 + (0x83 & 0x3F) -> jump to 5
2: 0xE2 <end_char> label character (0xE2 & 0x7F) -> match "b"
3: 0x02 <offset1> child at position 3 + (0x02 & 0x3F) -> jump to 5
4: 0x83 <end_offset1> child at position 5 + (0x83 & 0x3F) -> jump to 8
5: 0x61 <char> label character 0x61 -> match "a"
6: 0x61 <char> label character 0x61 -> match "a"
7: 0x81 <return_value> 0x81 & 0x0F -> return 1
8: 0x62 <char> label character 0x62 -> match "b"
9: 0x62 <char> label character 0x62 -> match "b"
10: 0x82 <return_value> 0x82 & 0x0F -> return 2
"""
import sys
class InputError(Exception):
"""Exception raised for errors in the input file."""
def to_dafsa(words):
"""Generates a DAFSA from a word list and returns the source node.
Each word is split into characters so that each character is represented by
a unique node. It is assumed the word list is not empty.
"""
if not words:
raise InputError('The domain list must not be empty')
def ToNodes(word):
"""Split words into characters"""
if not 0x1F < ord(word[0]) < 0x80:
raise InputError('Domain names must be printable 7-bit ASCII')
if len(word) == 1:
return chr(ord(word[0]) & 0x0F), [None]
return word[0], [ToNodes(word[1:])]
return [ToNodes(word) for word in words]
def to_words(node):
"""Generates a word list from all paths starting from an internal node."""
if not node:
return ['']
return [(node[0] + word) for child in node[1] for word in to_words(child)]
def reverse(dafsa):
"""Generates a new DAFSA that is reversed, so that the old sink node becomes
the new source node.
"""
sink = []
nodemap = {}
def dfs(node, parent):
"""Creates reverse nodes.
A new reverse node will be created for each old node. The new node will
get a reversed label and the parents of the old node as children.
"""
if not node:
sink.append(parent)
elif id(node) not in nodemap:
nodemap[id(node)] = (node[0][::-1], [parent])
for child in node[1]:
dfs(child, nodemap[id(node)])
else:
nodemap[id(node)][1].append(parent)
for node in dafsa:
dfs(node, None)
return sink
def join_labels(dafsa):
"""Generates a new DAFSA where internal nodes are merged if there is a one to
one connection.
"""
parentcount = { id(None): 2 }
nodemap = { id(None): None }
def count_parents(node):
"""Count incoming references"""
if id(node) in parentcount:
parentcount[id(node)] += 1
else:
parentcount[id(node)] = 1
for child in node[1]:
count_parents(child)
def join(node):
"""Create new nodes"""
if id(node) not in nodemap:
children = [join(child) for child in node[1]]
if len(children) == 1 and parentcount[id(node[1][0])] == 1:
child = children[0]
nodemap[id(node)] = (node[0] + child[0], child[1])
else:
nodemap[id(node)] = (node[0], children)
return nodemap[id(node)]
for node in dafsa:
count_parents(node)
return [join(node) for node in dafsa]
def join_suffixes(dafsa):
"""Generates a new DAFSA where nodes that represent the same word lists
towards the sink are merged.
"""
nodemap = { frozenset(('',)): None }
def join(node):
"""Returns a macthing node. A new node is created if no matching node
exists. The graph is accessed in dfs order.
"""
suffixes = frozenset(to_words(node))
if suffixes not in nodemap:
nodemap[suffixes] = (node[0], [join(child) for child in node[1]])
return nodemap[suffixes]
return [join(node) for node in dafsa]
def top_sort(dafsa):
"""Generates list of nodes in topological sort order."""
incoming = {}
def count_incoming(node):
"""Counts incoming references."""
if node:
if id(node) not in incoming:
incoming[id(node)] = 1
for child in node[1]:
count_incoming(child)
else:
incoming[id(node)] += 1
for node in dafsa:
count_incoming(node)
for node in dafsa:
incoming[id(node)] -= 1
waiting = [node for node in dafsa if incoming[id(node)] == 0]
nodes = []
while waiting:
node = waiting.pop()
assert incoming[id(node)] == 0
nodes.append(node)
for child in node[1]:
if child:
incoming[id(child)] -= 1
if incoming[id(child)] == 0:
waiting.append(child)
return nodes
def encode_links(children, offsets, current):
"""Encodes a list of children as one, two or three byte offsets."""
if not children[0]:
# This is an <end_label> node and no links follow such nodes
assert len(children) == 1
return []
guess = 3 * len(children)
assert children
children = sorted(children, key = lambda x: -offsets[id(x)])
while True:
offset = current + guess
buf = []
for child in children:
last = len(buf)
distance = offset - offsets[id(child)]
assert distance > 0 and distance < (1 << 21)
if distance < (1 << 6):
# A 6-bit offset: "s0xxxxxx"
buf.append(distance)
elif distance < (1 << 13):
# A 13-bit offset: "s10xxxxxxxxxxxxx"
buf.append(0x40 | (distance >> 8))
buf.append(distance & 0xFF)
else:
# A 21-bit offset: "s11xxxxxxxxxxxxxxxxxxxxx"
buf.append(0x60 | (distance >> 16))
buf.append((distance >> 8) & 0xFF)
buf.append(distance & 0xFF)
# Distance in first link is relative to following record.
# Distance in other links are relative to previous link.
offset -= distance
if len(buf) == guess:
break
guess = len(buf)
# Set most significant bit to mark end of links in this node.
buf[last] |= (1 << 7)
buf.reverse()
return buf
def encode_prefix(label):
"""Encodes a node label as a list of bytes without a trailing high byte.
This method encodes a node if there is exactly one child and the
child follows immidiately after so that no jump is needed. This label
will then be a prefix to the label in the child node.
"""
assert label
return [ord(c) for c in reversed(label)]
def encode_label(label):
"""Encodes a node label as a list of bytes with a trailing high byte >0x80.
"""
buf = encode_prefix(label)
# Set most significant bit to mark end of label in this node.
buf[0] |= (1 << 7)
return buf
def encode(dafsa):
"""Encodes a DAFSA to a list of bytes"""
output = []
offsets = {}
for node in reversed(top_sort(dafsa)):
if (len(node[1]) == 1 and node[1][0] and
(offsets[id(node[1][0])] == len(output))):
output.extend(encode_prefix(node[0]))
else:
output.extend(encode_links(node[1], offsets, len(output)))
output.extend(encode_label(node[0]))
offsets[id(node)] = len(output)
output.extend(encode_links(dafsa, offsets, len(output)))
output.reverse()
return output
def to_cxx(data):
"""Generates C++ code from a list of encoded bytes."""
text = '/* This file is generated. DO NOT EDIT!\n\n'
text += 'The byte array encodes effective tld names. See make_dafsa.py for'
text += ' documentation.'
text += '*/\n\n'
text += 'const unsigned char kDafsa[%s] = {\n' % len(data)
for i in range(0, len(data), 12):
text += ' '
text += ', '.join('0x%02x' % byte for byte in data[i:i + 12])
text += ',\n'
text += '};\n'
return text
def words_to_cxx(words):
"""Generates C++ code from a word list"""
dafsa = to_dafsa(words)
for fun in (reverse, join_suffixes, reverse, join_suffixes, join_labels):
dafsa = fun(dafsa)
return to_cxx(encode(dafsa))
def parse_gperf(infile):
"""Parses gperf file and extract strings and return code"""
lines = [line.strip() for line in infile]
# Extract strings after the first '%%' and before the second '%%'.
begin = lines.index('%%') + 1
end = lines.index('%%', begin)
lines = lines[begin:end]
for line in lines:
if line[-3:-1] != ', ':
raise InputError('Expected "domainname, <digit>", found "%s"' % line)
# Technically the DAFSA format could support return values in range [0-31],
# but the values below are the only with a defined meaning.
if line[-1] not in '0124':
raise InputError('Expected value to be one of {0,1,2,4}, found "%s"' %
line[-1])
return [line[:-3] + line[-1] for line in lines]
def main():
if len(sys.argv) != 3:
print('usage: %s infile outfile' % sys.argv[0])
return 1
with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile:
outfile.write(words_to_cxx(parse_gperf(infile)))
return 0
if __name__ == '__main__':
sys.exit(main())
#!/usr/bin/env python
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import sys
import unittest
import make_dafsa
class ParseGperfTest(unittest.TestCase):
def testMalformedKey(self):
"""Tests exception is thrown at bad format."""
infile1 = [ '%%', '', '%%' ]
self.assertRaises(make_dafsa.InputError, make_dafsa.parse_gperf, infile1)
infile2 = [ '%%', 'apa,1', '%%' ]
self.assertRaises(make_dafsa.InputError, make_dafsa.parse_gperf, infile2)
infile3 = [ '%%', 'apa, 1', '%%' ]
self.assertRaises(make_dafsa.InputError, make_dafsa.parse_gperf, infile3)
def testBadValues(self):
"""Tests exception is thrown when value is out of range."""
infile1 = [ '%%', 'a, -1', '%%' ]
self.assertRaises(make_dafsa.InputError, make_dafsa.parse_gperf, infile1)
infile2 = [ '%%', 'a, x', '%%' ]
self.assertRaises(make_dafsa.InputError, make_dafsa.parse_gperf, infile2)
infile3 = [ '%%', 'a, 3', '%%' ]
self.assertRaises(make_dafsa.InputError, make_dafsa.parse_gperf, infile3)
infile4 = [ '%%', 'a, 6', '%%' ]
self.assertRaises(make_dafsa.InputError, make_dafsa.parse_gperf, infile4)
infile5 = [ '%%', 'a, 12', '%%' ]
self.assertRaises(make_dafsa.InputError, make_dafsa.parse_gperf, infile5)
def testValues(self):
"""Tests legal values are accepted."""
infile1 = [ '%%', 'a, 0', '%%' ]
words1 = [ 'a0' ]
self.assertEqual(make_dafsa.parse_gperf(infile1), words1)
infile2 = [ '%%', 'a, 1', '%%' ]
words2 = [ 'a1' ]
self.assertEqual(make_dafsa.parse_gperf(infile2), words2)
infile3 = [ '%%', 'a, 2', '%%' ]
words3 = [ 'a2' ]
self.assertEqual(make_dafsa.parse_gperf(infile3), words3)
infile4 = [ '%%', 'a, 4', '%%' ]
words4 = [ 'a4' ]
self.assertEqual(make_dafsa.parse_gperf(infile4), words4)
def testOneWord(self):
"""Tests a single key can be parsed."""
infile = [ '%%', 'apa, 1', '%%' ]
words = [ 'apa1' ]
self.assertEqual(make_dafsa.parse_gperf(infile), words)
def testTwoWords(self):
"""Tests a sequence of keys can be parsed."""
infile = [ '%%', 'apa, 1', 'bepa.com, 2', '%%' ]
words = [ 'apa1', 'bepa.com2' ]
self.assertEqual(make_dafsa.parse_gperf(infile), words)
class ToDafsaTest(unittest.TestCase):
def testEmptyInput(self):
"""Tests exception is thrown at empty input."""
words = ()
self.assertRaises(make_dafsa.InputError, make_dafsa.to_dafsa, words)
def testNonASCII(self):
"""Tests exception is thrown if illegal characters are used."""
words1 = ( chr(0x1F) + 'a1', )
self.assertRaises(make_dafsa.InputError, make_dafsa.to_dafsa, words1)
words2 = ( 'a' + chr(0x1F) + '1', )
self.assertRaises(make_dafsa.InputError, make_dafsa.to_dafsa, words2)
words3 = ( chr(0x80) + 'a1', )
self.assertRaises(make_dafsa.InputError, make_dafsa.to_dafsa, words3)
words4 = ( 'a' + chr(0x80) + '1', )
self.assertRaises(make_dafsa.InputError, make_dafsa.to_dafsa, words4)
def testChar(self):
"""Tests a DAFSA can be created from a single character domain name."""
words = [ 'a0' ]
node2 = ( chr(0), [ None ] )
node1 = ( 'a', [ node2 ] )
source = [ node1 ]
self.assertEqual(make_dafsa.to_dafsa(words), source)
def testChars(self):
"""Tests a DAFSA can be created from a multi character domain name."""
words = [ 'ab0' ]
node3 = ( chr(0), [ None ] )
node2 = ( 'b', [ node3 ] )
node1 = ( 'a', [ node2 ] )
source = [ node1 ]
self.assertEqual(make_dafsa.to_dafsa(words), source)
def testWords(self):
"""Tests a DAFSA can be created from a sequence of domain names."""
words = [ 'a0', 'b1' ]
node4 = ( chr(1), [ None ] )
node3 = ( 'b', [ node4 ] )
node2 = ( chr(0), [ None ] )
node1 = ( 'a', [ node2 ] )
source = [ node1, node3 ]
self.assertEqual(make_dafsa.to_dafsa(words), source)
class ToWordsTest(unittest.TestCase):
def testSink(self):
"""Tests the sink is exapnded to a list with an empty string."""
node1 = None
words = [ '' ]
self.assertEqual(make_dafsa.to_words(node1), words)
def testSingleNode(self):
"""Tests a single node is expanded to a list with the label string."""
# 'ab' -> [ 'ab' ]
node1 = ( 'ab', [ None ] )
words = [ 'ab' ]
self.assertEqual(make_dafsa.to_words(node1), words)
def testChain(self):
"""Tests a sequence of nodes are preoperly expanded."""
# 'ab' -> 'cd' => [ 'abcd' ]
node2 = ( 'cd', [ None ] )
node1 = ( 'ab', [ node2 ] )
words = [ 'abcd' ]
self.assertEqual(make_dafsa.to_words(node1), words)
def testInnerTerminator(self):
"""Tests a sequence with an inner terminator is expanded to two strings."""
# 'a' -> 'b'
# \ => [ 'ab', 'a' ]
# {sink}
node2 = ( 'b', [ None ] )
node1 = ( 'a', [ node2, None ] )
words = [ 'ab', 'a' ]
self.assertEqual(make_dafsa.to_words(node1), words)
def testDiamond(self):
"""Tests a diamond can be expanded to a word list."""
# 'cd'
# / \
# 'ab' 'gh'
# \ /
# 'ef'
node4 = ( 'gh', [ None ] )
node3 = ( 'ef', [ node4 ] )
node2 = ( 'cd', [ node4 ] )
node1 = ( 'ab', [ node2, node3 ] )
words = [ 'abcdgh', 'abefgh' ]
self.assertEqual(make_dafsa.to_words(node1), words)
class JoinLabelsTest(unittest.TestCase):
def testLabel(self):
"""Tests a single label passes unchanged."""
# 'a' => 'a'
node1 = ( 'a', [ None ] )
source = [ node1 ]
self.assertEqual(make_dafsa.join_labels(source), source)
def testInnerTerminator(self):
"""Tests a sequence with an inner terminator passes unchanged."""
# 'a' -> 'b' 'a' -> 'b'
# \ => \
# {sink} {sink}
node2 = ( 'b', [ None ] )
node1 = ( 'a', [ node2, None ] )
source = [ node1 ]
self.assertEqual(make_dafsa.join_labels(source), source)
def testLabels(self):
"""Tests a sequence of labels can be joined."""
# 'a' -> 'b' => 'ab'
node2 = ( 'b', [ None ] )
node1 = ( 'a', [ node2 ] )
source1 = [ node1 ]
node3 = ( 'ab', [ None ] )
source2 = [ node3 ]
self.assertEqual(make_dafsa.join_labels(source1), source2)
def testCompositeLabels(self):
"""Tests a sequence of multi character labels can be joined."""
# 'ab' -> 'cd' => 'abcd'
node2 = ( 'cd', [ None ] )
node1 = ( 'ab', [ node2 ] )
source1 = [ node1 ]
node3 = ( 'abcd', [ None ] )
source2 = [ node3 ]
self.assertEqual(make_dafsa.join_labels(source1), source2)
def testAtomicTrie(self):
"""Tests a trie formed DAFSA with atomic labels passes unchanged."""
# 'b' 'b'
# / /
# 'a' => 'a'
# \ \
# 'c' 'c'
node3 = ( 'c', [ None ] )
node2 = ( 'b', [ None ] )
node1 = ( 'a', [ node2, node3 ] )
source = [ node1 ]
self.assertEqual(make_dafsa.join_labels(source), source)
def testReverseAtomicTrie(self):
"""Tests a reverse trie formed DAFSA with atomic labels passes unchanged."""
# 'a' 'a'
# \ \
# 'c' => 'c'
# / /
# 'b' 'b'
node3 = ( 'c', [ None ] )
node2 = ( 'b', [ node3 ] )
node1 = ( 'a', [ node3 ] )
source = [ node1, node2 ]
self.assertEqual(make_dafsa.join_labels(source), source)
def testChainedTrie(self):
"""Tests a trie formed DAFSA with chained labels can be joined."""
# 'c' -> 'd' 'cd'
# / /
# 'a' -> 'b' => 'ab'
# \ \
# 'e' -> 'f' 'ef'
node6 = ( 'f', [ None ] )
node5 = ( 'e', [ node6 ] )
node4 = ( 'd', [ None ] )
node3 = ( 'c', [ node4 ] )
node2 = ( 'b', [ node3, node5 ] )
node1 = ( 'a', [ node2 ] )
source1 = [ node1 ]
node9 = ( 'ef', [ None ] )
node8 = ( 'cd', [ None ] )
node7 = ( 'ab', [ node8, node9 ] )
source2 = [ node7 ]
self.assertEqual(make_dafsa.join_labels(source1), source2)
def testReverseChainedTrie(self):
"""Tests a reverse trie formed DAFSA with chained labels can be joined."""
# 'a' -> 'b' 'ab'
# \ \
# 'e' -> 'f' => 'ef'
# / /
# 'c' -> 'd' 'cd'
node6 = ( 'f', [ None ] )
node5 = ( 'e', [ node6 ] )
node4 = ( 'd', [ node5 ] )
node3 = ( 'c', [ node4 ] )
node2 = ( 'b', [ node5 ] )
node1 = ( 'a', [ node2 ] )
source1 = [ node1, node3 ]
node9 = ( 'ef', [ None ] )
node8 = ( 'cd', [ node9 ] )
node7 = ( 'ab', [ node9 ] )
source2 = [ node7, node8 ]
self.assertEqual(make_dafsa.join_labels(source1), source2)
class JoinSuffixesTest(unittest.TestCase):
def testSingleLabel(self):
"""Tests a single label passes unchanged."""
# 'a' => 'a'
node1 = ( 'a', [ None ] )
source = [ node1 ]
self.assertEqual(make_dafsa.join_suffixes(source), source)
def testInnerTerminator(self):
"""Tests a sequence with an inner terminator passes unchanged."""
# 'a' -> 'b' 'a' -> 'b'
# \ => \
# {sink} {sink}
node2 = ( 'b', [ None ] )
node1 = ( 'a', [ node2, None ] )
source = [ node1 ]
self.assertEqual(make_dafsa.join_suffixes(source), source)
def testDistinctTrie(self):
"""Tests a trie formed DAFSA with distinct labels passes unchanged."""
# 'b' 'b'
# / /
# 'a' => 'a'
# \ \
# 'c' 'c'
node3 = ( 'c', [ None ] )
node2 = ( 'b', [ None ] )
node1 = ( 'a', [ node2, node3 ] )
source = [ node1 ]
self.assertEqual(make_dafsa.join_suffixes(source), source)
def testReverseDistinctTrie(self):
"""Tests a reverse trie formed DAFSA with distinct labels passes unchanged.
"""
# 'a' 'a'
# \ \
# 'c' => 'c'
# / /
# 'b' 'b'
node3 = ( 'c', [ None ] )
node2 = ( 'b', [ node3 ] )
node1 = ( 'a', [ node3 ] )
source = [ node1, node2 ]
self.assertEqual(make_dafsa.join_suffixes(source), source)
def testJoinTwoHeads(self):
"""Tests two heads can be joined even if there is something else between."""
# 'a' ------'a'
# /
# 'b' => 'b' /
# /
# 'a' ---
#
# The picture above should shows that the new version should have just one
# instance of the node with label 'a'.
node3 = ( 'a', [ None ] )
node2 = ( 'b', [ None ] )
node1 = ( 'a', [ None ] )
source1 = [ node1, node2, node3 ]
source2 = make_dafsa.join_suffixes(source1)
# Both versions should expand to the same content.
self.assertEqual(source1, source2)
# But the new version should have just one instance of 'a'.
self.assertIs(source2[0], source2[2])
def testJoinTails(self):
"""Tests tails can be joined."""
# 'a' -> 'c' 'a'
# \
# => 'c'
# /
# 'b' -> 'c' 'b'
node4 = ( 'c', [ None ] )
node3 = ( 'b', [ node4 ] )
node2 = ( 'c', [ None ] )
node1 = ( 'a', [ node2 ] )
source1 = [ node1, node3 ]
source2 = make_dafsa.join_suffixes(source1)
# Both versions should expand to the same content.
self.assertEqual(source1, source2)
# But the new version should have just one tail.
self.assertIs(source2[0][1][0], source2[1][1][0])
def testMakeRecursiveTrie(self):
"""Tests recursive suffix join."""
# 'a' -> 'e' -> 'g' 'a'
# \
# 'e'
# / \
# 'b' -> 'e' -> 'g' 'b' \
# \
# => 'g'
# /
# 'c' -> 'f' -> 'g' 'c' /
# \ /
# 'f'
# /
# 'd' -> 'f' -> 'g' 'd'
node7 = ( 'g', [ None ] )
node6 = ( 'f', [ node7 ] )
node5 = ( 'e', [ node7 ] )
node4 = ( 'd', [ node6 ] )
node3 = ( 'c', [ node6 ] )
node2 = ( 'b', [ node5 ] )
node1 = ( 'a', [ node5 ] )
source1 = [ node1, node2, node3, node4 ]
source2 = make_dafsa.join_suffixes(source1)
# Both versions should expand to the same content.
self.assertEqual(source1, source2)
# But the new version should have just one 'e'.
self.assertIs(source2[0][1][0], source2[1][1][0])
# And one 'f'.
self.assertIs(source2[2][1][0], source2[3][1][0])
# And one 'g'.
self.assertIs(source2[0][1][0][1][0], source2[2][1][0][1][0])
def testMakeDiamond(self):
"""Test we can join suffixes of a trie."""
# 'b' -> 'd' 'b'
# / / \
# 'a' => 'a' 'd'
# \ \ /
# 'c' -> 'd' 'c'
node5 = ( 'd', [ None ] )
node4 = ( 'c', [ node5 ] )
node3 = ( 'd', [ None ] )
node2 = ( 'b', [ node3 ] )
node1 = ( 'a', [ node2, node4 ] )
source1 = [ node1 ]
source2 = make_dafsa.join_suffixes(source1)
# Both versions should expand to the same content.
self.assertEqual(source1, source2)
# But the new version should have just one 'd'.
self.assertIs(source2[0][1][0][1][0], source2[0][1][1][1][0])
def testJoinOneChild(self):
"""Tests that we can join some children but not all."""
# 'c' ----'c'
# / / /
# 'a' 'a' /
# \ \ /
# 'd' 'd'/
# => /
# 'c' /
# / /
# 'b' 'b'
# \ \
# 'e' 'e'
node6 = ( 'e', [ None ] )
node5 = ( 'c', [ None ] )
node4 = ( 'b', [ node5, node6 ] )
node3 = ( 'd', [ None ] )
node2 = ( 'c', [ None ] )
node1 = ( 'a', [ node2, node3 ] )
source1 = [ node1, node4 ]
source2 = make_dafsa.join_suffixes(source1)
# Both versions should expand to the same content.
self.assertEqual(source1, source2)
# But the new version should have just one 'c'.
self.assertIs(source2[0][1][0], source2[1][1][0])
class ReverseTest(unittest.TestCase):
def testAtomicLabel(self):
"""Tests an atomic label passes unchanged."""
# 'a' => 'a'
node1 = ( 'a', [ None ] )
source = [ node1 ]
self.assertEqual(make_dafsa.reverse(source), source)
def testLabel(self):
"""Tests that labels are reversed."""
# 'ab' => 'ba'
node1 = ( 'ab', [ None ] )
source1 = [ node1 ]
node2 = ( 'ba', [ None ] )
source2 = [ node2 ]
self.assertEqual(make_dafsa.reverse(source1), source2)
def testChain(self):
"""Tests that edges are reversed."""
# 'a' -> 'b' => 'b' -> 'a'
node2 = ( 'b', [ None ] )
node1 = ( 'a', [ node2 ] )
source1 = [ node1 ]
node4 = ( 'a', [ None ] )
node3 = ( 'b', [ node4 ] )
source2 = [ node3 ]
self.assertEqual(make_dafsa.reverse(source1), source2)
def testInnerTerminator(self):
"""Tests a sequence with an inner terminator can be reversed."""
# 'a' -> 'b' 'b' -> 'a'
# \ => /
# {sink} ------
node2 = ( 'b', [ None ] )
node1 = ( 'a', [ node2, None ] )
source1 = [ node1 ]
node4 = ( 'a', [ None ] )
node3 = ( 'b', [ node4 ] )
source2 = [ node3, node4 ]
self.assertEqual(make_dafsa.reverse(source1), source2)
def testAtomicTrie(self):
"""Tests a trie formed DAFSA can be reversed."""
# 'b' 'b'
# / \
# 'a' => 'a'
# \ /
# 'c' 'c'
node3 = ( 'c', [ None ] )
node2 = ( 'b', [ None ] )
node1 = ( 'a', [ node2, node3 ] )
source1 = [ node1 ]
node6 = ( 'a', [ None ] )
node5 = ( 'c', [ node6 ] )
node4 = ( 'b', [ node6 ] )
source2 = [ node4, node5 ]
self.assertEqual(make_dafsa.reverse(source1), source2)
def testReverseAtomicTrie(self):
"""Tests a reverse trie formed DAFSA can be reversed."""
# 'a' 'a'
# \ /
# 'c' => 'c'
# / \
# 'b' 'b'
node3 = ( 'c', [ None ] )
node2 = ( 'b', [ node3 ] )
node1 = ( 'a', [ node3 ] )
source1 = [ node1, node2 ]
node6 = ( 'b', [ None ] )
node5 = ( 'a', [ None ] )
node4 = ( 'c', [ node5, node6 ] )
source2 = [ node4 ]
self.assertEqual(make_dafsa.reverse(source1), source2)
def testDiamond(self):
"""Tests we can reverse both edges and nodes in a diamond."""
# 'cd' 'dc'
# / \ / \
# 'ab' 'gh' => 'hg' 'ba'
# \ / \ /
# 'ef' 'fe'
node4 = ( 'gh', [ None ] )
node3 = ( 'ef', [ node4 ] )
node2 = ( 'cd', [ node4 ] )
node1 = ( 'ab', [ node2, node3 ] )
source1 = [ node1 ]
node8 = ( 'ba', [ None ] )
node7 = ( 'fe', [ node8 ] )
node6 = ( 'dc', [ node8 ] )
node5 = ( 'hg', [ node6, node7 ] )
source2 = [ node5 ]
self.assertEqual(make_dafsa.reverse(source1), source2)
class TopSortTest(unittest.TestCase):
def testNode(self):
"""Tests a DAFSA with one node can be sorted."""
# 'a' => [ 'a' ]
node1 = ( 'a', [ None ] )
source = [ node1 ]
nodes = [ node1 ]
self.assertEqual(make_dafsa.top_sort(source), nodes)
def testDiamond(self):
"""Tests nodes in a diamond can be sorted."""
# 'b'
# / \
# 'a' 'd'
# \ /
# 'c'
node4 = ( 'd', [ None ] )
node3 = ( 'c', [ node4 ] )
node2 = ( 'b', [ node4 ] )
node1 = ( 'a', [ node2, node3 ] )
source = [ node1 ]
nodes = make_dafsa.top_sort(source)
self.assertLess(nodes.index(node1), nodes.index(node2))
self.assertLess(nodes.index(node2), nodes.index(node4))
self.assertLess(nodes.index(node3), nodes.index(node4))
class EncodePrefixTest(unittest.TestCase):
def testChar(self):
"""Tests to encode a single character prefix."""
label = 'a'
bytes = [ ord('a') ]
self.assertEqual(make_dafsa.encode_prefix(label), bytes)
def testChars(self):
"""Tests to encode a multi character prefix."""
label = 'ab'
bytes = [ ord('b'), ord('a') ]
self.assertEqual(make_dafsa.encode_prefix(label), bytes)
class EncodeLabelTest(unittest.TestCase):
def testChar(self):
"""Tests to encode a single character label."""
label = 'a'
bytes = [ ord('a') + 0x80 ]
self.assertEqual(make_dafsa.encode_label(label), bytes)
def testChars(self):
"""Tests to encode a multi character label."""
label = 'ab'
bytes = [ ord('b') + 0x80, ord('a') ]
self.assertEqual(make_dafsa.encode_label(label), bytes)
class EncodeLinksTest(unittest.TestCase):
def testEndLabel(self):
"""Tests to encode link to the sink."""
children = [ None ]
offsets = {}
bytes = 0
output = []
self.assertEqual(make_dafsa.encode_links(children, offsets, bytes),
output)
def testOneByteOffset(self):
"""Tests to encode a single one byte offset."""
node = ( '', [ None ] )
children = [ node ]
offsets = { id(node) : 2 }
bytes = 5
output = [ 132 ]
self.assertEqual(make_dafsa.encode_links(children, offsets, bytes),
output)
def testOneByteOffsets(self):
"""Tests to encode a sequence of one byte offsets."""
node1 = ( '', [ None ] )
node2 = ( '', [ None ] )
children = [ node1, node2 ]
offsets = { id(node1) : 2, id(node2) : 1 }
bytes = 5
output = [ 129, 5 ]
self.assertEqual(make_dafsa.encode_links(children, offsets, bytes),
output)
def testTwoBytesOffset(self):
"""Tests to encode a single two byte offset."""
node = ( '', [ None ] )
children = [ node ]
offsets = { id(node) : 2 }
bytes = 1005
output = [ 237, 195]
self.assertEqual(make_dafsa.encode_links(children, offsets, bytes),
output)
def testTwoBytesOffsets(self):
"""Tests to encode a sequence of two byte offsets."""
node1 = ( '', [ None ] )
node2 = ( '', [ None ] )
node3 = ( '', [ None ] )
children = [ node1, node2, node3 ]
offsets = { id(node1) : 1002, id(node2) : 2, id(node3) : 2002 }
bytes = 3005
output = [ 232, 195, 232, 67, 241, 67 ]
self.assertEqual(make_dafsa.encode_links(children, offsets, bytes),
output)
def testThreeBytesOffset(self):
"""Tests to encode a single three byte offset."""
node = ( '', [ None ] )
children = [ node ]
offsets = { id(node) : 2 }
bytes = 100005
output = [ 166, 134, 225 ]
self.assertEqual(make_dafsa.encode_links(children, offsets, bytes),
output)
def testThreeBytesOffsets(self):
"""Tests to encode a sequence of three byte offsets."""
node1 = ( '', [ None ] )
node2 = ( '', [ None ] )
node3 = ( '', [ None ] )
children = [ node1, node2, node3 ]
offsets = { id(node1) : 100002, id(node2) : 2, id(node3) : 200002 }
bytes = 300005
output = [ 160, 134, 225, 160, 134, 97, 172, 134, 97 ]
self.assertEqual(make_dafsa.encode_links(children, offsets, bytes),
output)
def testOneTwoThreeBytesOffsets(self):
"""Tests to encode offsets of different sizes."""
node1 = ( '', [ None ] )
node2 = ( '', [ None ] )
node3 = ( '', [ None ] )
children = [ node1, node2, node3 ]
offsets = { id(node1) : 10003, id(node2) : 10002, id(node3) : 100002 }
bytes = 300005
output = [ 129, 143, 95, 97, 74, 13, 99 ]
self.assertEqual(make_dafsa.encode_links(children, offsets, bytes),
output)
class ExamplesTest(unittest.TestCase):
def testExample1(self):
"""Tests Example 1 from make_dafsa.py."""
infile = [ '%%', 'aa, 1', 'a, 2', '%%' ]
bytes = [ 0x81, 0xE1, 0x02, 0x81, 0x82, 0x61, 0x81 ]
outfile = make_dafsa.to_cxx(bytes)
self.assertEqual(make_dafsa.words_to_cxx(make_dafsa.parse_gperf(infile)),
outfile)
def testExample2(self):
"""Tests Example 2 from make_dafsa.py."""
infile = [ '%%', 'aa, 1', 'bbb, 2', 'baa, 1', '%%' ]
bytes = [ 0x02, 0x83, 0xE2, 0x02, 0x83, 0x61, 0x61, 0x81, 0x62, 0x62,
0x82 ]
outfile = make_dafsa.to_cxx(bytes)
self.assertEqual(make_dafsa.words_to_cxx(make_dafsa.parse_gperf(infile)),
outfile)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment