Skip to content

Commit

Permalink
Cache all query carousels (#9807)
Browse files Browse the repository at this point in the history
* Cache all QueryCarousels by shortening cache key prefix by md5 hashing `args` and `kwargs` for cache keys
* Include name of cached macro in cache key
* Option to prevent caching select carousels

Avoids caching the book page related works carousel, which is fetched
after the page loads.  Also prevents the homepage "Classic Books"
carousel from being cached, as the homepage itself is cached for the
same amount of time.

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Mek <michael.karpeles@gmail.com>
  • Loading branch information
3 people authored Sep 20, 2024
1 parent ae33dd7 commit d3bb158
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 47 deletions.
16 changes: 10 additions & 6 deletions openlibrary/core/cache.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Caching utilities.
"""

import hashlib
import random
import string
import time
Expand Down Expand Up @@ -59,6 +60,7 @@ def __init__(
key_prefix: str | None = None,
timeout: int = MINUTE_SECS,
prethread: Callable | None = None,
hash_args: bool = False,
):
"""Creates a new memoized function for ``f``."""
self.f = f
Expand All @@ -70,6 +72,7 @@ def __init__(
self.stats = web.storage(calls=0, hits=0, updates=0, async_updates=0)
self.active_threads: dict = {}
self.prethread = prethread
self.hash_args = hash_args

def _get_memcache(self):
if self._memcache is None:
Expand Down Expand Up @@ -177,20 +180,21 @@ def join_threads(self):
thread.join()

def encode_args(self, args, kw=None):
"""Encodes arguments to construct the memcache key."""
kw = kw or {}
"""Encodes arguments to construct the memcache key.
"""

# strip [ and ] from key
a = self.json_encode(list(args))[1:-1]

if kw:
return a + "-" + self.json_encode(kw)
else:
return a
a = a + "-" + self.json_encode(kw)
if self.hash_args:
return f"{hashlib.md5(a.encode('utf-8')).hexdigest()}"
return a

def compute_key(self, args, kw):
"""Computes memcache key for storing result of function call with given arguments."""
key = self.key_prefix + "-" + self.encode_args(args, kw)
key = self.key_prefix + "$" + self.encode_args(args, kw)
return key.replace(
" ", "_"
) # XXX: temporary fix to handle spaces in the arguments
Expand Down
2 changes: 1 addition & 1 deletion openlibrary/i18n/messages.pot
Original file line number Diff line number Diff line change
Expand Up @@ -7457,7 +7457,7 @@ msgstr ""
msgid "This graph charts editions published on this subject."
msgstr ""

#: QueryCarousel.html
#: RawQueryCarousel.html
msgid "Search collection"
msgstr ""

Expand Down
29 changes: 5 additions & 24 deletions openlibrary/macros/QueryCarousel.html
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
$def with(query, title=None, sort='new', key='', limit=20, search=False, has_fulltext_only=True, url=None, layout='carousel')
$def with(query, title=None, sort='new', key='', limit=20, search=False, has_fulltext_only=True, url=None, layout='carousel', use_cache=True)

$# Takes following parameters
$# * query (str) -- Any arbitrary Open Library search query, e.g. subject:"Textbooks"
Expand All @@ -9,26 +9,7 @@
$# * search (bool) -- whether to include search within collection
$# * layout (str) -- layout type, default 'carousel', currently also supports 'grid'

$# Enable search within this query
$if search:
<form action="/search" class="olform pagesearchbox">
<input type="hidden" name="q" value="$query"/>
$if has_fulltext_only:
<input type="hidden" name="has_fulltext" value="true"/>
<input type="text" placeholder="$_('Search collection')" name="q2"/>
<input type="submit"/>
</form>

$code:
# Limit to just fields needed to render carousels
params = { 'q': query, 'fields': 'key,title,subtitle,author_name,cover_i,ia,availability,id_project_gutenberg,id_librivox,id_standard_ebooks,id_openstax' }
# Don't need fields in the search UI url, since they don't do anything there
url = url or "/search?" + urlencode({'q': query})
if has_fulltext_only:
params['has_fulltext'] = 'true'

results = work_search(params, sort=sort, limit=limit, facet=False)
books = [storage(b) for b in (results.get('docs', []))]
load_more = {"url": "/search.json?" + urlencode(params), "limit": limit }

$:render_template("books/custom_carousel", books=books, title=title, url=url, key=key, load_more=load_more, layout=layout)
$if use_cache:
$:macros.CacheableMacro("RawQueryCarousel", query, title=title, sort=sort, key=key, limit=limit, search=search, has_fulltext_only=has_fulltext_only, url=url, layout=layout)
$else:
$:macros.RawQueryCarousel(query, title=title, sort=sort, key=key, limit=limit, search=search, has_fulltext_only=has_fulltext_only, url=url, layout=layout)
34 changes: 34 additions & 0 deletions openlibrary/macros/RawQueryCarousel.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
$def with(query, title=None, sort='new', key='', limit=20, search=False, has_fulltext_only=True, url=None, layout='carousel')

$# Takes following parameters
$# * query (str) -- Any arbitrary Open Library search query, e.g. subject:"Textbooks"
$# * title (str) -- A title to show above the carousel (links to /search?q=query)
$# * sort (str) -- optional sort param defined within work_search.py `work_search`
$# * key (str) -- unique name of the carousel in analytics
$# * limit (int) -- initial number of books to pull
$# * search (bool) -- whether to include search within collection
$# * layout (str) -- layout type, default 'carousel', currently also supports 'grid'

$# Enable search within this query
$if search:
<form action="/search" class="olform pagesearchbox">
<input type="hidden" name="q" value="$query"/>
$if has_fulltext_only:
<input type="hidden" name="has_fulltext" value="true"/>
<input type="text" placeholder="$_('Search collection')" name="q2"/>
<input type="submit"/>
</form>

$code:
# Limit to just fields needed to render carousels
params = { 'q': query, 'fields': 'key,title,subtitle,author_name,cover_i,ia,availability,id_project_gutenberg,id_librivox,id_standard_ebooks,id_openstax' }
# Don't need fields in the search UI url, since they don't do anything there
url = url or "/search?" + urlencode({'q': query})
if has_fulltext_only:
params['has_fulltext'] = 'true'

results = work_search(params, sort=sort, limit=limit, facet=False)
books = [storage(b) for b in (results.get('docs', []))]
load_more = {"url": "/search.json?" + urlencode(params), "limit": limit }

$:render_template("books/custom_carousel", books=books, title=title, url=url, key=key, load_more=load_more, layout=layout)
27 changes: 14 additions & 13 deletions openlibrary/plugins/upstream/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,25 +218,26 @@ def render_macro(name, args, **kwargs):


@public
def render_cached_macro(name, args: tuple, **kwargs):
def render_cached_macro(name: str, args: tuple, **kwargs):
from openlibrary.plugins.openlibrary.home import caching_prethread

def get_key():
def get_key_prefix():
lang = web.ctx.lang
pd = web.cookies().get('pd', False)
key = f'{name}.{lang}'
if pd:
key += '.pd'
for arg in args:
key += f'.{arg}'
for k, v in kwargs.items():
key += f'.{k}:{v}'
return key
key_prefix = f'{name}.{lang}'
if web.cookies().get('pd', False):
key_prefix += '.pd'
if web.cookies().get('sfw', ''):
key_prefix += '.sfw'
return key_prefix

five_minutes = 5 * 60
key = get_key()
key_prefix = get_key_prefix()
mc = cache.memcache_memoize(
render_macro, key, timeout=five_minutes, prethread=caching_prethread()
render_macro,
key_prefix=key_prefix,
timeout=five_minutes,
prethread=caching_prethread(),
hash_args=True, # this avoids cache key length overflow
)

try:
Expand Down
4 changes: 2 additions & 2 deletions openlibrary/templates/books/RelatedWorksCarousel.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
$ )
$if author_keys:
$ query += ' -author_key:(%s)' % ' OR '.join(['%s' % a for a in author_keys])
$:macros.QueryCarousel(query=query, title=_("You might also like"), key="related-subjects-carousel", limit=12)
$:macros.QueryCarousel(query=query, title=_("You might also like"), key="related-subjects-carousel", limit=12, use_cache=False)
$if author_keys:
$ query = 'ebook_access:[borrowable TO *] -key:"%s" author_key:(%s)' % (
$ work.key,
$ ' OR '.join(['%s' % a for a in author_keys]),
$ )
$ title = ungettext("More by this author", "More by these authors", len(author_keys))
$:macros.QueryCarousel(query=query, title=title, key="related-authors-carousel", limit=12)
$:macros.QueryCarousel(query=query, title=title, key="related-authors-carousel", limit=12, use_cache=False)
</div>
2 changes: 1 addition & 1 deletion openlibrary/templates/home/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
$if not test:
$:render_template("books/custom_carousel", key="trending", books=get_trending_books(books_only=True, since_days=0, since_hours=24, minimum=3, sort_by_count=False), title=_('Trending Books'), url="/trending/daily", test=test, load_more={"url": "/trending/hours.json?hours=24&minimum=3&sort_by_count=false", "mode": "page", "limit": 18})

$:macros.QueryCarousel(query="ddc:8* first_publish_year:[* TO 1950] publish_year:[2000 TO *] NOT public_scan_b:false", title=_('Classic Books'), key="public_domain", url="/read", sort='random.hourly')
$:macros.QueryCarousel(query="ddc:8* first_publish_year:[* TO 1950] publish_year:[2000 TO *] NOT public_scan_b:false", title=_('Classic Books'), key="public_domain", url="/read", sort='random.hourly', use_cache=False)

$:render_template("home/custom_ia_carousel", title=_('Books We Love'), key="staff_picks", query='languageSorter:("English")', subject="openlibrary_staff_picks", sorts=["lending___last_browse desc"], limit=18, test=test)

Expand Down

0 comments on commit d3bb158

Please sign in to comment.