Skip to content

Commit

Permalink
add tokenizer.is_fast warning on creation
Browse files Browse the repository at this point in the history
  • Loading branch information
masahi committed Feb 21, 2024
1 parent f716851 commit 992b1a0
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
3 changes: 0 additions & 3 deletions serve/mlc_serve/engine/engine_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,6 @@ def detokenize_incrementally(
prefix_begin_offset = generation_sequence.prefix_begin_offset
prefix_end_offset = generation_sequence.prefix_end_offset

# Some models, such as Qwen, uses a tokenizer from titoken package which fails this assert.
# assert tokenizer.is_fast

prefix_text = tokenizer.convert_tokens_to_string(
output_tokens[prefix_begin_offset:prefix_end_offset]
)
Expand Down
8 changes: 8 additions & 0 deletions serve/mlc_serve/model/tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import structlog
from typing import List
from transformers import AutoTokenizer
from ..engine import ChatMessage
from pathlib import Path

LOG = structlog.stdlib.get_logger(__name__)


class Tokenizer:
def __init__(self, hf_tokenizer, skip_special_tokens=True):
Expand Down Expand Up @@ -55,3 +58,8 @@ def __init__(self, tokenizer_path: Path):
)
self.tokenizer = Tokenizer(hf_tokenizer)
self.conversation_template = ConversationTemplate(hf_tokenizer)

if not self.tokenizer.is_fast:
LOG.warn("tokenizer.is_fast is false. Some models using an external tokenizer package, "
"such as QWen, might hit this condition but that does not imply that their "
"tokenizers are slow.")

0 comments on commit 992b1a0

Please sign in to comment.