feat: implement PP-Structure table extraction pipeline with GPU runtime configuration support

This commit is contained in:
Adriankf59
2026-04-27 00:51:23 +07:00
parent 9d969e61fd
commit 6d793758ff
12 changed files with 896 additions and 31 deletions

View File

@@ -2,8 +2,12 @@
from __future__ import annotations
import sys
from types import ModuleType, SimpleNamespace
import pytest
from ocr_sprint.pipeline import table as table_module
from ocr_sprint.pipeline.table import (
DetectedTable,
extract_tables_from_pp_result,
@@ -82,6 +86,34 @@ class TestDetectedTable:
assert table.n_cols == 0
class TestPpStructureInit:
def test_gpu_init_falls_back_to_cpu(self, monkeypatch: pytest.MonkeyPatch) -> None:
calls: list[dict[str, object]] = []
class FakePPStructure:
def __init__(self, **kwargs: object) -> None:
calls.append(kwargs)
if kwargs["use_gpu"]:
raise RuntimeError("gpu init failed")
fake_paddleocr = ModuleType("paddleocr")
fake_paddleocr.PPStructure = FakePPStructure
monkeypatch.setitem(sys.modules, "paddleocr", fake_paddleocr)
monkeypatch.setattr(
table_module,
"get_settings",
lambda: SimpleNamespace(ocr_lang="latin", ocr_use_gpu=True),
)
engine = table_module._build_pp_structure()
assert isinstance(engine, FakePPStructure)
assert calls == [
{"lang": "en", "use_gpu": True, "layout": True, "show_log": False},
{"lang": "en", "use_gpu": False, "layout": True, "show_log": False},
]
@pytest.fixture
def sample_personnel_table() -> DetectedTable:
"""Header + three personnel rows in a typical Polres-level format."""