| """Tests to cover the Tools/i18n package""" |
| |
| import os |
| import re |
| import sys |
| import unittest |
| from textwrap import dedent |
| from pathlib import Path |
| |
| from test.support.script_helper import assert_python_ok |
| from test.test_tools import skip_if_missing, toolsdir |
| from test.support.os_helper import temp_cwd, temp_dir |
| |
| |
| skip_if_missing() |
| |
| DATA_DIR = Path(__file__).resolve().parent / 'i18n_data' |
| |
| |
| def normalize_POT_file(pot): |
| """Normalize the POT creation timestamp, charset and |
| file locations to make the POT file easier to compare. |
| |
| """ |
| # Normalize the creation date. |
| date_pattern = re.compile(r'"POT-Creation-Date: .+?\\n"') |
| header = r'"POT-Creation-Date: 2000-01-01 00:00+0000\\n"' |
| pot = re.sub(date_pattern, header, pot) |
| |
| # Normalize charset to UTF-8 (currently there's no way to specify the output charset). |
| charset_pattern = re.compile(r'"Content-Type: text/plain; charset=.+?\\n"') |
| charset = r'"Content-Type: text/plain; charset=UTF-8\\n"' |
| pot = re.sub(charset_pattern, charset, pot) |
| |
| # Normalize file location path separators in case this test is |
| # running on Windows (which uses '\'). |
| fileloc_pattern = re.compile(r'#:.+') |
| |
| def replace(match): |
| return match[0].replace(os.sep, "/") |
| pot = re.sub(fileloc_pattern, replace, pot) |
| return pot |
| |
| |
| class Test_pygettext(unittest.TestCase): |
| """Tests for the pygettext.py tool""" |
| |
| script = Path(toolsdir, 'i18n', 'pygettext.py') |
| |
| def get_header(self, data): |
| """ utility: return the header of a .po file as a dictionary """ |
| headers = {} |
| for line in data.split('\n'): |
| if not line or line.startswith(('#', 'msgid', 'msgstr')): |
| continue |
| line = line.strip('"') |
| key, val = line.split(':', 1) |
| headers[key] = val.strip() |
| return headers |
| |
| def get_msgids(self, data): |
| """ utility: return all msgids in .po file as a list of strings """ |
| msgids = [] |
| reading_msgid = False |
| cur_msgid = [] |
| for line in data.split('\n'): |
| if reading_msgid: |
| if line.startswith('"'): |
| cur_msgid.append(line.strip('"')) |
| else: |
| msgids.append('\n'.join(cur_msgid)) |
| cur_msgid = [] |
| reading_msgid = False |
| continue |
| if line.startswith('msgid '): |
| line = line[len('msgid '):] |
| cur_msgid.append(line.strip('"')) |
| reading_msgid = True |
| else: |
| if reading_msgid: |
| msgids.append('\n'.join(cur_msgid)) |
| |
| return msgids |
| |
| def assert_POT_equal(self, expected, actual): |
| """Check if two POT files are equal""" |
| self.maxDiff = None |
| self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual)) |
| |
| def extract_from_str(self, module_content, *, args=(), strict=True): |
| """Return all msgids extracted from module_content.""" |
| filename = 'test.py' |
| with temp_cwd(None): |
| with open(filename, 'w', encoding='utf-8') as fp: |
| fp.write(module_content) |
| res = assert_python_ok('-Xutf8', self.script, *args, filename) |
| if strict: |
| self.assertEqual(res.err, b'') |
| with open('messages.pot', encoding='utf-8') as fp: |
| data = fp.read() |
| return self.get_msgids(data) |
| |
| def extract_docstrings_from_str(self, module_content): |
| """Return all docstrings extracted from module_content.""" |
| return self.extract_from_str(module_content, args=('--docstrings',), strict=False) |
| |
| def test_header(self): |
| """Make sure the required fields are in the header, according to: |
| http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry |
| """ |
| with temp_cwd(None) as cwd: |
| assert_python_ok('-Xutf8', self.script) |
| with open('messages.pot', encoding='utf-8') as fp: |
| data = fp.read() |
| header = self.get_header(data) |
| |
| self.assertIn("Project-Id-Version", header) |
| self.assertIn("POT-Creation-Date", header) |
| self.assertIn("PO-Revision-Date", header) |
| self.assertIn("Last-Translator", header) |
| self.assertIn("Language-Team", header) |
| self.assertIn("MIME-Version", header) |
| self.assertIn("Content-Type", header) |
| self.assertIn("Content-Transfer-Encoding", header) |
| self.assertIn("Generated-By", header) |
| |
| # not clear if these should be required in POT (template) files |
| #self.assertIn("Report-Msgid-Bugs-To", header) |
| #self.assertIn("Language", header) |
| |
| #"Plural-Forms" is optional |
| |
| @unittest.skipIf(sys.platform.startswith('aix'), |
| 'bpo-29972: broken test on AIX') |
| def test_POT_Creation_Date(self): |
| """ Match the date format from xgettext for POT-Creation-Date """ |
| from datetime import datetime |
| with temp_cwd(None) as cwd: |
| assert_python_ok('-Xutf8', self.script) |
| with open('messages.pot', encoding='utf-8') as fp: |
| data = fp.read() |
| header = self.get_header(data) |
| creationDate = header['POT-Creation-Date'] |
| |
| # peel off the escaped newline at the end of string |
| if creationDate.endswith('\\n'): |
| creationDate = creationDate[:-len('\\n')] |
| |
| # This will raise if the date format does not exactly match. |
| datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z') |
| |
| def test_funcdocstring(self): |
| for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): |
| with self.subTest(doc): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| def foo(bar): |
| %s |
| ''' % doc)) |
| self.assertIn('doc', msgids) |
| |
| def test_funcdocstring_bytes(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| def foo(bar): |
| b"""doc""" |
| ''')) |
| self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) |
| |
| def test_funcdocstring_fstring(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| def foo(bar): |
| f"""doc""" |
| ''')) |
| self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) |
| |
| def test_classdocstring(self): |
| for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): |
| with self.subTest(doc): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| class C: |
| %s |
| ''' % doc)) |
| self.assertIn('doc', msgids) |
| |
| def test_classdocstring_bytes(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| class C: |
| b"""doc""" |
| ''')) |
| self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) |
| |
| def test_classdocstring_fstring(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| class C: |
| f"""doc""" |
| ''')) |
| self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) |
| |
| def test_moduledocstring(self): |
| for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): |
| with self.subTest(doc): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| %s |
| ''' % doc)) |
| self.assertIn('doc', msgids) |
| |
| def test_moduledocstring_bytes(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| b"""doc""" |
| ''')) |
| self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) |
| |
| def test_moduledocstring_fstring(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| f"""doc""" |
| ''')) |
| self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) |
| |
| def test_msgid(self): |
| msgids = self.extract_docstrings_from_str( |
| '''_("""doc""" r'str' u"ing")''') |
| self.assertIn('docstring', msgids) |
| |
| def test_msgid_bytes(self): |
| msgids = self.extract_docstrings_from_str('_(b"""doc""")') |
| self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) |
| |
| def test_msgid_fstring(self): |
| msgids = self.extract_docstrings_from_str('_(f"""doc""")') |
| self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) |
| |
| def test_funcdocstring_annotated_args(self): |
| """ Test docstrings for functions with annotated args """ |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| def foo(bar: str): |
| """doc""" |
| ''')) |
| self.assertIn('doc', msgids) |
| |
| def test_funcdocstring_annotated_return(self): |
| """ Test docstrings for functions with annotated return type """ |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| def foo(bar) -> str: |
| """doc""" |
| ''')) |
| self.assertIn('doc', msgids) |
| |
| def test_funcdocstring_defvalue_args(self): |
| """ Test docstring for functions with default arg values """ |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| def foo(bar=()): |
| """doc""" |
| ''')) |
| self.assertIn('doc', msgids) |
| |
| def test_funcdocstring_multiple_funcs(self): |
| """ Test docstring extraction for multiple functions combining |
| annotated args, annotated return types and default arg values |
| """ |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| def foo1(bar: tuple=()) -> str: |
| """doc1""" |
| |
| def foo2(bar: List[1:2]) -> (lambda x: x): |
| """doc2""" |
| |
| def foo3(bar: 'func'=lambda x: x) -> {1: 2}: |
| """doc3""" |
| ''')) |
| self.assertIn('doc1', msgids) |
| self.assertIn('doc2', msgids) |
| self.assertIn('doc3', msgids) |
| |
| def test_classdocstring_early_colon(self): |
| """ Test docstring extraction for a class with colons occurring within |
| the parentheses. |
| """ |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)): |
| """doc""" |
| ''')) |
| self.assertIn('doc', msgids) |
| |
| def test_calls_in_fstrings(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| f"{_('foo bar')}" |
| ''')) |
| self.assertIn('foo bar', msgids) |
| |
| def test_calls_in_fstrings_raw(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| rf"{_('foo bar')}" |
| ''')) |
| self.assertIn('foo bar', msgids) |
| |
| def test_calls_in_fstrings_nested(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| f"""{f'{_("foo bar")}'}""" |
| ''')) |
| self.assertIn('foo bar', msgids) |
| |
| def test_calls_in_fstrings_attribute(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| f"{obj._('foo bar')}" |
| ''')) |
| self.assertIn('foo bar', msgids) |
| |
| def test_calls_in_fstrings_with_call_on_call(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| f"{type(str)('foo bar')}" |
| ''')) |
| self.assertNotIn('foo bar', msgids) |
| |
| def test_calls_in_fstrings_with_format(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| f"{_('foo {bar}').format(bar='baz')}" |
| ''')) |
| self.assertIn('foo {bar}', msgids) |
| |
| def test_calls_in_fstrings_with_wrong_input_1(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| f"{_(f'foo {bar}')}" |
| ''')) |
| self.assertFalse([msgid for msgid in msgids if 'foo {bar}' in msgid]) |
| |
| def test_calls_in_fstrings_with_wrong_input_2(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| f"{_(1)}" |
| ''')) |
| self.assertNotIn(1, msgids) |
| |
| def test_calls_in_fstring_with_multiple_args(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| f"{_('foo', 'bar')}" |
| ''')) |
| self.assertNotIn('foo', msgids) |
| self.assertNotIn('bar', msgids) |
| |
| def test_calls_in_fstring_with_keyword_args(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| f"{_('foo', bar='baz')}" |
| ''')) |
| self.assertNotIn('foo', msgids) |
| self.assertNotIn('bar', msgids) |
| self.assertNotIn('baz', msgids) |
| |
| def test_calls_in_fstring_with_partially_wrong_expression(self): |
| msgids = self.extract_docstrings_from_str(dedent('''\ |
| f"{_(f'foo') + _('bar')}" |
| ''')) |
| self.assertNotIn('foo', msgids) |
| self.assertIn('bar', msgids) |
| |
| def test_function_and_class_names(self): |
| """Test that function and class names are not mistakenly extracted.""" |
| msgids = self.extract_from_str(dedent('''\ |
| def _(x): |
| pass |
| |
| def _(x="foo"): |
| pass |
| |
| async def _(x): |
| pass |
| |
| class _(object): |
| pass |
| ''')) |
| self.assertEqual(msgids, ['']) |
| |
| def test_pygettext_output(self): |
| """Test that the pygettext output exactly matches snapshots.""" |
| for input_file in DATA_DIR.glob('*.py'): |
| output_file = input_file.with_suffix('.pot') |
| with self.subTest(input_file=f'i18n_data/{input_file}'): |
| contents = input_file.read_text(encoding='utf-8') |
| with temp_cwd(None): |
| Path(input_file.name).write_text(contents) |
| assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name) |
| output = Path('messages.pot').read_text(encoding='utf-8') |
| |
| expected = output_file.read_text(encoding='utf-8') |
| self.assert_POT_equal(expected, output) |
| |
| def test_files_list(self): |
| """Make sure the directories are inspected for source files |
| bpo-31920 |
| """ |
| text1 = 'Text to translate1' |
| text2 = 'Text to translate2' |
| text3 = 'Text to ignore' |
| with temp_cwd(None), temp_dir(None) as sdir: |
| pymod = Path(sdir, 'pypkg', 'pymod.py') |
| pymod.parent.mkdir() |
| pymod.write_text(f'_({text1!r})', encoding='utf-8') |
| |
| pymod2 = Path(sdir, 'pkg.py', 'pymod2.py') |
| pymod2.parent.mkdir() |
| pymod2.write_text(f'_({text2!r})', encoding='utf-8') |
| |
| pymod3 = Path(sdir, 'CVS', 'pymod3.py') |
| pymod3.parent.mkdir() |
| pymod3.write_text(f'_({text3!r})', encoding='utf-8') |
| |
| assert_python_ok('-Xutf8', self.script, sdir) |
| data = Path('messages.pot').read_text(encoding='utf-8') |
| self.assertIn(f'msgid "{text1}"', data) |
| self.assertIn(f'msgid "{text2}"', data) |
| self.assertNotIn(text3, data) |
| |
| |
| def update_POT_snapshots(): |
| for input_file in DATA_DIR.glob('*.py'): |
| output_file = input_file.with_suffix('.pot') |
| contents = input_file.read_bytes() |
| with temp_cwd(None): |
| Path(input_file.name).write_bytes(contents) |
| assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name) |
| output = Path('messages.pot').read_text(encoding='utf-8') |
| |
| output = normalize_POT_file(output) |
| output_file.write_text(output, encoding='utf-8') |
| |
| |
| if __name__ == '__main__': |
| # To regenerate POT files |
| if len(sys.argv) > 1 and sys.argv[1] == '--snapshot-update': |
| update_POT_snapshots() |
| sys.exit(0) |
| unittest.main() |