|
| 1 | +import unittest |
| 2 | +from test.test_tools import toolsdir, imports_under_tool |
| 3 | +from test import support |
| 4 | +from test.support.hypothesis_helper import hypothesis |
| 5 | + |
| 6 | +st = hypothesis.strategies |
| 7 | +given = hypothesis.given |
| 8 | +example = hypothesis.example |
| 9 | + |
| 10 | + |
| 11 | +with imports_under_tool("unicode"): |
| 12 | + from dawg import Dawg, build_compression_dawg, lookup, inverse_lookup |
| 13 | + |
| 14 | + |
| 15 | +@st.composite |
| 16 | +def char_name_db(draw, min_length=1, max_length=30): |
| 17 | + m = draw(st.integers(min_value=min_length, max_value=max_length)) |
| 18 | + names = draw( |
| 19 | + st.sets(st.text("abcd", min_size=1, max_size=10), min_size=m, max_size=m) |
| 20 | + ) |
| 21 | + characters = draw(st.sets(st.characters(), min_size=m, max_size=m)) |
| 22 | + return list(zip(names, characters)) |
| 23 | + |
| 24 | + |
| 25 | +class TestDawg(unittest.TestCase): |
| 26 | + """Tests for the directed acyclic word graph data structure that is used |
| 27 | + to store the unicode character names in unicodedata. Tests ported from PyPy |
| 28 | + """ |
| 29 | + |
| 30 | + def test_dawg_direct_simple(self): |
| 31 | + dawg = Dawg() |
| 32 | + dawg.insert("a", -4) |
| 33 | + dawg.insert("c", -2) |
| 34 | + dawg.insert("cat", -1) |
| 35 | + dawg.insert("catarr", 0) |
| 36 | + dawg.insert("catnip", 1) |
| 37 | + dawg.insert("zcatnip", 5) |
| 38 | + packed, data, inverse = dawg.finish() |
| 39 | + |
| 40 | + self.assertEqual(lookup(packed, data, b"a"), -4) |
| 41 | + self.assertEqual(lookup(packed, data, b"c"), -2) |
| 42 | + self.assertEqual(lookup(packed, data, b"cat"), -1) |
| 43 | + self.assertEqual(lookup(packed, data, b"catarr"), 0) |
| 44 | + self.assertEqual(lookup(packed, data, b"catnip"), 1) |
| 45 | + self.assertEqual(lookup(packed, data, b"zcatnip"), 5) |
| 46 | + self.assertRaises(KeyError, lookup, packed, data, b"b") |
| 47 | + self.assertRaises(KeyError, lookup, packed, data, b"catni") |
| 48 | + self.assertRaises(KeyError, lookup, packed, data, b"catnipp") |
| 49 | + |
| 50 | + self.assertEqual(inverse_lookup(packed, inverse, -4), b"a") |
| 51 | + self.assertEqual(inverse_lookup(packed, inverse, -2), b"c") |
| 52 | + self.assertEqual(inverse_lookup(packed, inverse, -1), b"cat") |
| 53 | + self.assertEqual(inverse_lookup(packed, inverse, 0), b"catarr") |
| 54 | + self.assertEqual(inverse_lookup(packed, inverse, 1), b"catnip") |
| 55 | + self.assertEqual(inverse_lookup(packed, inverse, 5), b"zcatnip") |
| 56 | + self.assertRaises(KeyError, inverse_lookup, packed, inverse, 12) |
| 57 | + |
| 58 | + def test_forbid_empty_dawg(self): |
| 59 | + dawg = Dawg() |
| 60 | + self.assertRaises(ValueError, dawg.finish) |
| 61 | + |
| 62 | + @given(char_name_db()) |
| 63 | + @example([("abc", "a"), ("abd", "b")]) |
| 64 | + @example( |
| 65 | + [ |
| 66 | + ("bab", "1"), |
| 67 | + ("a", ":"), |
| 68 | + ("ad", "@"), |
| 69 | + ("b", "<"), |
| 70 | + ("aacc", "?"), |
| 71 | + ("dab", "D"), |
| 72 | + ("aa", "0"), |
| 73 | + ("ab", "F"), |
| 74 | + ("aaa", "7"), |
| 75 | + ("cbd", "="), |
| 76 | + ("abad", ";"), |
| 77 | + ("ac", "B"), |
| 78 | + ("abb", "4"), |
| 79 | + ("bb", "2"), |
| 80 | + ("aab", "9"), |
| 81 | + ("caaaaba", "E"), |
| 82 | + ("ca", ">"), |
| 83 | + ("bbaaa", "5"), |
| 84 | + ("d", "3"), |
| 85 | + ("baac", "8"), |
| 86 | + ("c", "6"), |
| 87 | + ("ba", "A"), |
| 88 | + ] |
| 89 | + ) |
| 90 | + @example( |
| 91 | + [ |
| 92 | + ("bcdac", "9"), |
| 93 | + ("acc", "g"), |
| 94 | + ("d", "d"), |
| 95 | + ("daabdda", "0"), |
| 96 | + ("aba", ";"), |
| 97 | + ("c", "6"), |
| 98 | + ("aa", "7"), |
| 99 | + ("abbd", "c"), |
| 100 | + ("badbd", "?"), |
| 101 | + ("bbd", "f"), |
| 102 | + ("cc", "@"), |
| 103 | + ("bb", "8"), |
| 104 | + ("daca", ">"), |
| 105 | + ("ba", ":"), |
| 106 | + ("baac", "3"), |
| 107 | + ("dbdddac", "a"), |
| 108 | + ("a", "2"), |
| 109 | + ("cabd", "b"), |
| 110 | + ("b", "="), |
| 111 | + ("abd", "4"), |
| 112 | + ("adcbd", "5"), |
| 113 | + ("abc", "e"), |
| 114 | + ("ab", "1"), |
| 115 | + ] |
| 116 | + ) |
| 117 | + def test_dawg(self, data): |
| 118 | + # suppress debug prints |
| 119 | + with support.captured_stdout() as output: |
| 120 | + # it's enough to build it, building will also check the result |
| 121 | + build_compression_dawg(data) |
0 commit comments