| import unittest |
| from test.test_tools import skip_if_missing, imports_under_tool |
| from test import support |
| from test.support.hypothesis_helper import hypothesis |
| |
| st = hypothesis.strategies |
| given = hypothesis.given |
| example = hypothesis.example |
| |
| |
| skip_if_missing("unicode") |
| with imports_under_tool("unicode"): |
| from dawg import Dawg, build_compression_dawg, lookup, inverse_lookup |
| |
| |
| @st.composite |
| def char_name_db(draw, min_length=1, max_length=30): |
| m = draw(st.integers(min_value=min_length, max_value=max_length)) |
| names = draw( |
| st.sets(st.text("abcd", min_size=1, max_size=10), min_size=m, max_size=m) |
| ) |
| characters = draw(st.sets(st.characters(), min_size=m, max_size=m)) |
| return list(zip(names, characters)) |
| |
| |
| class TestDawg(unittest.TestCase): |
| """Tests for the directed acyclic word graph data structure that is used |
| to store the unicode character names in unicodedata. Tests ported from PyPy |
| """ |
| |
| def test_dawg_direct_simple(self): |
| dawg = Dawg() |
| dawg.insert("a", -4) |
| dawg.insert("c", -2) |
| dawg.insert("cat", -1) |
| dawg.insert("catarr", 0) |
| dawg.insert("catnip", 1) |
| dawg.insert("zcatnip", 5) |
| packed, data, inverse = dawg.finish() |
| |
| self.assertEqual(lookup(packed, data, b"a"), -4) |
| self.assertEqual(lookup(packed, data, b"c"), -2) |
| self.assertEqual(lookup(packed, data, b"cat"), -1) |
| self.assertEqual(lookup(packed, data, b"catarr"), 0) |
| self.assertEqual(lookup(packed, data, b"catnip"), 1) |
| self.assertEqual(lookup(packed, data, b"zcatnip"), 5) |
| self.assertRaises(KeyError, lookup, packed, data, b"b") |
| self.assertRaises(KeyError, lookup, packed, data, b"catni") |
| self.assertRaises(KeyError, lookup, packed, data, b"catnipp") |
| |
| self.assertEqual(inverse_lookup(packed, inverse, -4), b"a") |
| self.assertEqual(inverse_lookup(packed, inverse, -2), b"c") |
| self.assertEqual(inverse_lookup(packed, inverse, -1), b"cat") |
| self.assertEqual(inverse_lookup(packed, inverse, 0), b"catarr") |
| self.assertEqual(inverse_lookup(packed, inverse, 1), b"catnip") |
| self.assertEqual(inverse_lookup(packed, inverse, 5), b"zcatnip") |
| self.assertRaises(KeyError, inverse_lookup, packed, inverse, 12) |
| |
| def test_forbid_empty_dawg(self): |
| dawg = Dawg() |
| self.assertRaises(ValueError, dawg.finish) |
| |
| @given(char_name_db()) |
| @example([("abc", "a"), ("abd", "b")]) |
| @example( |
| [ |
| ("bab", "1"), |
| ("a", ":"), |
| ("ad", "@"), |
| ("b", "<"), |
| ("aacc", "?"), |
| ("dab", "D"), |
| ("aa", "0"), |
| ("ab", "F"), |
| ("aaa", "7"), |
| ("cbd", "="), |
| ("abad", ";"), |
| ("ac", "B"), |
| ("abb", "4"), |
| ("bb", "2"), |
| ("aab", "9"), |
| ("caaaaba", "E"), |
| ("ca", ">"), |
| ("bbaaa", "5"), |
| ("d", "3"), |
| ("baac", "8"), |
| ("c", "6"), |
| ("ba", "A"), |
| ] |
| ) |
| @example( |
| [ |
| ("bcdac", "9"), |
| ("acc", "g"), |
| ("d", "d"), |
| ("daabdda", "0"), |
| ("aba", ";"), |
| ("c", "6"), |
| ("aa", "7"), |
| ("abbd", "c"), |
| ("badbd", "?"), |
| ("bbd", "f"), |
| ("cc", "@"), |
| ("bb", "8"), |
| ("daca", ">"), |
| ("ba", ":"), |
| ("baac", "3"), |
| ("dbdddac", "a"), |
| ("a", "2"), |
| ("cabd", "b"), |
| ("b", "="), |
| ("abd", "4"), |
| ("adcbd", "5"), |
| ("abc", "e"), |
| ("ab", "1"), |
| ] |
| ) |
| def test_dawg(self, data): |
| # suppress debug prints |
| with support.captured_stdout() as output: |
| # it's enough to build it, building will also check the result |
| build_compression_dawg(data) |