From a51a51474984be273bf7678d436d8bfecb19f988 Mon Sep 17 00:00:00 2001 From: Brendan <2bndy5@gmail.com> Date: Wed, 3 Jul 2024 23:16:09 -0700 Subject: [PATCH] improve search index for incremental builds add test --- sphinx_immaterial/search.py | 40 ++++++++++++++++++++++++------------ tests/search_indexer_test.py | 38 ++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 13 deletions(-) diff --git a/sphinx_immaterial/search.py b/sphinx_immaterial/search.py index f8aff65f..7de54ea2 100644 --- a/sphinx_immaterial/search.py +++ b/sphinx_immaterial/search.py @@ -1,8 +1,9 @@ """Generates a search index for use by the lunr.js-based mkdocs-material search.""" import multiprocessing +import multiprocessing.managers import pathlib -from typing import Dict, Any, cast +from typing import Dict, Any, cast, List, Optional import docutils.nodes import jinja2.sandbox @@ -16,6 +17,24 @@ _SEARCH_QUEUE_KEY = "_sphinx_immaterial_search_entry_queue" _SEARCH_CONFIG = "_sphinx_immaterial_search_config" +_SEARCH_QUEUE_MGR_KEY = "_sphinx_immaterial_search_multiprocessing_manager" + + +def _init_search_index_queue(app: sphinx.application.Sphinx): + manager: multiprocessing.managers.SyncManager = getattr(app, _SEARCH_QUEUE_MGR_KEY) + prev_queue = cast( + Optional[multiprocessing.managers.DictProxy], + getattr(app.env, _SEARCH_QUEUE_KEY, None), + ) + if prev_queue is not None: + queue = manager.dict(**prev_queue) + else: + queue = manager.dict() + setattr( + app.env, + _SEARCH_QUEUE_KEY, + queue, + ) def _get_search_config(app: sphinx.application.Sphinx): @@ -91,8 +110,9 @@ def _html_page_context( toc=page_ctx["toc"], ) ) - queue = getattr(app, _SEARCH_QUEUE_KEY) - queue.append(indexer.entries) + + queue: Dict[str, List[_Page]] = getattr(app.env, _SEARCH_QUEUE_KEY) + queue[pagename] = indexer.entries def _build_finished(app: sphinx.application.Sphinx, exc) -> None: @@ -104,16 +124,11 @@ def _build_finished(app: sphinx.application.Sphinx, exc) -> None: if not isinstance(app.builder, sphinx.builders.html.StandaloneHTMLBuilder): return - queue = getattr(app, _SEARCH_QUEUE_KEY) + queue: Dict[str, List[_Page]] = getattr(app.env, _SEARCH_QUEUE_KEY) indexer = _make_indexer(app) - for entries in queue[:]: + for entries in queue.values(): indexer.entries.extend(entries) output_path = pathlib.Path(app.outdir) / "search" / "search_index.json" - # try: - # existing_data = output_path.read_text(encoding='utf-8') - - # except FileNotFoundError: - # pass json_data = indexer.generate_search_index(prev=None) output_path.parent.mkdir(exist_ok=True) @@ -124,9 +139,8 @@ def _build_finished(app: sphinx.application.Sphinx, exc) -> None: def setup(app: sphinx.application.Sphinx): app.connect("html-page-context", _html_page_context) app.connect("build-finished", _build_finished) - manager = multiprocessing.Manager() - setattr(app, "_sphinx_immaterial_search_multiprocessing_manager", manager) - setattr(app, _SEARCH_QUEUE_KEY, manager.list()) + app.connect("builder-inited", _init_search_index_queue) + setattr(app, _SEARCH_QUEUE_MGR_KEY, multiprocessing.Manager()) return { "parallel_read_safe": True, "parallel_write_safe": True, diff --git a/tests/search_indexer_test.py b/tests/search_indexer_test.py index 9939f93b..ce719f3b 100644 --- a/tests/search_indexer_test.py +++ b/tests/search_indexer_test.py @@ -38,3 +38,41 @@ def test_search_metadata(tmp_path: Path, immaterial_make_app): assert "docs" in index_json assert len(index_json["docs"]) == 1 assert index_json["docs"][0]["boost"] == "2" + + +def test_search_incremental_builds(tmp_path: Path, immaterial_make_app): + """generate a graph and some regular/mono text with system font as fallback.""" + + def verify_search_index(index_out: Path): + index_json = json.loads(index_out.read_bytes()) + assert "docs" in index_json + assert len(index_json["docs"]) == 2 + locations = [e["location"] for e in index_json["docs"]] + assert "index.html" in locations + assert "and_more.html" in locations + return index_json + + app: SphinxTestApp = immaterial_make_app(files=FILES) + app.build() + index_out = tmp_path / "_build" / "html" / "search" / "search_index.json" + assert index_out.exists() + first_index = verify_search_index(index_out) + + # change the contents of the and_more.rst file + changed_file = tmp_path / "and_more.rst" + assert changed_file.exists() + changed_file.write_bytes(changed_file.read_bytes() + b"\nSome new content\n") + assert "and_more" in list(app.builder.get_outdated_docs()) + app.build() + + # verify expected changes on second build + new_index = verify_search_index(index_out) + assert new_index != first_index + for e in new_index["docs"]: + if e["location"] == "and_more.html": + changed_text = e["text"] + break + else: # pragma: no cover + # should never get here. but just in case something changes the test... + raise RuntimeError("search index entry for changed file not found") + assert "Some new content" in changed_text