"""Local-filesystem blob storage.""" from __future__ import annotations from pathlib import Path import pytest from ocr_sprint.storage.blob import LocalFsBlobStorage @pytest.fixture def storage(tmp_path: Path) -> LocalFsBlobStorage: return LocalFsBlobStorage(tmp_path / "blobs") def test_put_returns_dated_key(storage: LocalFsBlobStorage) -> None: key = storage.put(b"hello", original_filename="surat.pdf") # Layout is YYYY/MM/DD/.pdf parts = key.split("/") assert len(parts) == 4 assert parts[3].endswith(".pdf") assert storage.exists(key) assert storage.get(key) == b"hello" def test_put_unknown_extension_falls_back_to_bin(storage: LocalFsBlobStorage) -> None: key = storage.put(b"x", original_filename="weird.xyz") assert key.endswith(".bin") def test_put_strips_directory_traversal(storage: LocalFsBlobStorage) -> None: # ext is taken via Path().suffix, not from the raw filename, so a name # like "../../etc/passwd" is harmless — the only thing the caller can # influence is the extension. key = storage.put(b"y", original_filename="../../etc/passwd") assert "etc" not in key assert key.endswith(".bin") def test_put_handles_missing_filename(storage: LocalFsBlobStorage) -> None: key = storage.put(b"z", original_filename=None) assert key.endswith(".bin") def test_get_unknown_key_raises(storage: LocalFsBlobStorage) -> None: with pytest.raises(FileNotFoundError): storage.get("2026/01/01/bogus.pdf") def test_delete_is_idempotent(storage: LocalFsBlobStorage) -> None: key = storage.put(b"q", original_filename="x.png") storage.delete(key) assert not storage.exists(key) storage.delete(key) # second delete must not raise def test_resolve_rejects_path_escape(storage: LocalFsBlobStorage) -> None: with pytest.raises(ValueError, match="escapes storage root"): storage._resolve("../../../etc/passwd") def test_resolve_rejects_directory_prefix_collision(tmp_path: Path) -> None: """Regression: ``startswith`` would mis-accept sibling dirs whose names happen to begin with the storage root's basename. ``is_relative_to`` handles this correctly. """ root = tmp_path / "blobs" root.mkdir() sibling = tmp_path / "blobs_evil" sibling.mkdir() storage = LocalFsBlobStorage(root) with pytest.raises(ValueError, match="escapes storage root"): storage._resolve("../blobs_evil/secret.txt") def test_exists_returns_false_for_escaped_key(storage: LocalFsBlobStorage) -> None: # exists() must not raise even for malicious keys. assert storage.exists("../../etc/passwd") is False def test_open_streams_content(storage: LocalFsBlobStorage) -> None: key = storage.put(b"streamed", original_filename="x.png") with storage.open(key) as fh: assert fh.read() == b"streamed"