From b2726328f86d2b12cd783a20c80204d107bdd24d Mon Sep 17 00:00:00 2001 From: Alexandre Menezes Date: Mon, 5 Feb 2024 14:28:58 -0300 Subject: [PATCH] fix in method tesseract_parameters --- aiopytesseract/base_command.py | 2 +- aiopytesseract/commands.py | 13 +++++++++++-- aiopytesseract/models/parameter.py | 14 +++++++++----- setup.cfg | 3 ++- tests/test_commands.py | 8 ++++++++ 5 files changed, 31 insertions(+), 9 deletions(-) diff --git a/aiopytesseract/base_command.py b/aiopytesseract/base_command.py index f01db7a..63faaf9 100644 --- a/aiopytesseract/base_command.py +++ b/aiopytesseract/base_command.py @@ -226,7 +226,7 @@ async def _build_cmd_args( if config: for option, value in config: cmd_args.append("-c") - cmd_args.append(f"{option}={value} ") + cmd_args.append(f"{option}={value}") extension = reversed(output_extension.split()) for ext in extension: diff --git a/aiopytesseract/commands.py b/aiopytesseract/commands.py index c275662..3e55c69 100644 --- a/aiopytesseract/commands.py +++ b/aiopytesseract/commands.py @@ -168,14 +168,23 @@ async def tesseract_parameters( raw_data: bytes = await proc.stdout.read() # type: ignore data = raw_data.decode(encoding) params = [] - for line in data.split("\n"): + # [1:] - skip first line with text: "Tesseract parameters:\n" + for line in data.split("\n")[1:]: param = re.search(r"(\w+)\s+(-?\d+.?\d*)\s+(.*)[^\n]$", line) if param: params.append( cattr.structure_attrs_fromtuple( - [param.group(1), param.group(2), param.group(3)], Parameter # type: ignore + [param.group(1), param.group(3), param.group(2)], Parameter # type: ignore ) ) + else: + param = re.search(r"(\w+)\s+(.*)[^\n]$", line) + if param: + params.append( + cattr.structure_attrs_fromtuple( + [param.group(1), param.group(2)], Parameter # type: ignore + ) + ) return sorted(params, key=lambda p: p.name) diff --git a/aiopytesseract/models/parameter.py b/aiopytesseract/models/parameter.py index 9dd7c21..073322d 100644 --- a/aiopytesseract/models/parameter.py +++ b/aiopytesseract/models/parameter.py @@ -1,8 +1,12 @@ -from dataclasses import dataclass +from typing import Union +from attr import converters, field, frozen, validators -@dataclass(frozen=True) + +@frozen class Parameter: - name: str - value: float - description: str + name: str = field(validator=validators.instance_of(str)) + description: str = field(validator=validators.instance_of(str)) + value: Union[None, str] = field( + default=None, converter=converters.default_if_none("-") # type: ignore + ) diff --git a/setup.cfg b/setup.cfg index 951a9b2..eb84e64 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,6 +38,7 @@ keywords = "asyncio", "ocr", "tesseract" packages = find: install_requires = aiofiles >= 0.8.0 + attrs >= 22.1.0 cattrs >= 22.1.0 python_requires = >= 3.8 @@ -90,7 +91,7 @@ disallow_untyped_decorators = True disallow_any_generics = True [tox:tox] -envlist = py{38,39,310,311,312},pypy{3.8,3.9} +envlist = py{38,39,310,311,312},pypy{3.8,3.9,3.10} [testenv] deps = -rrequirements-dev.txt diff --git a/tests/test_commands.py b/tests/test_commands.py index 835cd48..080689c 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -79,6 +79,14 @@ async def test_tesseract_parameters(): assert isinstance(parameters[0], Parameter) +@pytest.mark.xfail( + reason="The number of parameters in Tesseract can vary between releases." +) +async def test_len_tesseract_parameters(): + parameters = await aiopytesseract.tesseract_parameters() + assert len(parameters) == 627 + + @pytest.mark.parametrize( "func, timeout", [