From 0258c7fe0d34141f412b7d010b6624ce30cb94f3 Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Fri, 8 Feb 2019 15:46:13 +0800 Subject: [PATCH] perf(dictionary): refactor DictEntryIterator and do partial sort --- src/rime/dict/dictionary.cc | 53 ++++++++-------------- src/rime/dict/dictionary.h | 21 ++++----- src/rime/gear/reverse_lookup_translator.cc | 10 ++-- src/rime/gear/table_translator.cc | 14 +++--- src/rime/gear/table_translator.h | 4 +- test/dictionary_test.cc | 6 +-- 6 files changed, 46 insertions(+), 62 deletions(-) diff --git a/src/rime/dict/dictionary.cc b/src/rime/dict/dictionary.cc index b0fa9682a..4cef15147 100644 --- a/src/rime/dict/dictionary.cc +++ b/src/rime/dict/dictionary.cc @@ -53,43 +53,26 @@ size_t match_extra_code(const table::Code* extra_code, size_t depth, } // namespace dictionary -DictEntryIterator::DictEntryIterator() - : Base(), table_(NULL), entry_(), entry_count_(0) { -} - -DictEntryIterator::DictEntryIterator(const DictEntryIterator& other) - : Base(other), table_(other.table_), entry_(other.entry_), - entry_count_(other.entry_count_) { -} - -DictEntryIterator& DictEntryIterator::operator= (DictEntryIterator& other) { - DLOG(INFO) << "swapping iterator contents."; - swap(other); - table_ = other.table_; - entry_ = other.entry_; - entry_count_ = other.entry_count_; - return *this; -} - -bool DictEntryIterator::exhausted() const { - return empty(); -} - void DictEntryIterator::AddChunk(dictionary::Chunk&& chunk, Table* table) { - push_back(std::move(chunk)); + chunks_.push_back(std::move(chunk)); entry_count_ += chunk.size; table_ = table; } void DictEntryIterator::Sort() { - sort(dictionary::compare_chunk_by_head_element); + // partial-sort remaining chunks, move best match to chunk_index_ + std::partial_sort( + chunks_.begin() + chunk_index_, + chunks_.begin() + chunk_index_ + 1, + chunks_.end(), + dictionary::compare_chunk_by_head_element); } void DictEntryIterator::PrepareEntry() { - if (empty() || !table_) { + if (exhausted() || !table_) { return; } - const auto& chunk(front()); + const auto& chunk(chunks_[chunk_index_]); entry_ = New(); const auto& e(chunk.entries[chunk.cursor]); DLOG(INFO) << "creating temporary dict entry '" @@ -105,7 +88,7 @@ void DictEntryIterator::PrepareEntry() { } an DictEntryIterator::Peek() { - while (!entry_ && !empty()) { + while (!entry_ && !exhausted()) { PrepareEntry(); if (filter_ && !filter_(entry_)) { Next(); @@ -116,30 +99,30 @@ an DictEntryIterator::Peek() { bool DictEntryIterator::Next() { entry_.reset(); - if (empty()) { + if (exhausted()) { return false; } - auto& chunk(front()); + auto& chunk(chunks_[chunk_index_]); if (++chunk.cursor >= chunk.size) { - pop_front(); + ++chunk_index_; } else { - // reorder chunks since front() has got a new head element + // reorder chunks since the current chunk has got a new head element Sort(); } - return !empty(); + return !exhausted(); } bool DictEntryIterator::Skip(size_t num_entries) { while (num_entries > 0) { - if (empty()) return false; - auto& chunk(front()); + if (exhausted()) return false; + auto& chunk(chunks_[chunk_index_]); if (chunk.cursor + num_entries < chunk.size) { chunk.cursor += num_entries; return true; } num_entries -= (chunk.size - chunk.cursor); - pop_front(); + ++chunk_index_; } return true; } diff --git a/src/rime/dict/dictionary.h b/src/rime/dict/dictionary.h index f1aaaedb5..2b8427878 100644 --- a/src/rime/dict/dictionary.h +++ b/src/rime/dict/dictionary.h @@ -39,30 +39,29 @@ bool compare_chunk_by_leading_element(const Chunk& a, const Chunk& b); } // namespace dictionary -class DictEntryIterator : protected list, - public DictEntryFilterBinder { +class DictEntryIterator : public DictEntryFilterBinder { public: - using Base = list; - - RIME_API DictEntryIterator(); - RIME_API DictEntryIterator(const DictEntryIterator& other); - DictEntryIterator& operator= (DictEntryIterator& other); + DictEntryIterator() = default; + DictEntryIterator(DictEntryIterator&& other) = default; + DictEntryIterator& operator= (DictEntryIterator&& other) = default; void AddChunk(dictionary::Chunk&& chunk, Table* table); void Sort(); RIME_API an Peek(); RIME_API bool Next(); bool Skip(size_t num_entries); - RIME_API bool exhausted() const; + bool exhausted() const { return chunk_index_ == chunks_.size(); } size_t entry_count() const { return entry_count_; } protected: void PrepareEntry(); private: - Table* table_; - an entry_; - size_t entry_count_; + vector chunks_; + size_t chunk_index_ = 0; + Table* table_ = nullptr; + an entry_ = nullptr; + size_t entry_count_ = 0; }; struct DictEntryCollector : map { diff --git a/src/rime/gear/reverse_lookup_translator.cc b/src/rime/gear/reverse_lookup_translator.cc index 98b26d9b3..dcdf5bb4c 100644 --- a/src/rime/gear/reverse_lookup_translator.cc +++ b/src/rime/gear/reverse_lookup_translator.cc @@ -32,9 +32,10 @@ class ReverseLookupTranslation : public TableTranslation { const string& input, size_t start, size_t end, const string& preedit, - const DictEntryIterator& iter, + DictEntryIterator&& iter, bool quality) - : TableTranslation(options, NULL, input, start, end, preedit, iter), + : TableTranslation( + options, NULL, input, start, end, preedit, std::move(iter)), dict_(dict), options_(options), quality_(quality) { } virtual an Peek(); @@ -185,7 +186,7 @@ an ReverseLookupTranslator::Query(const string& input, auto collector = dict_->Lookup(graph, 0); if (collector && !collector->empty() && collector->rbegin()->first == consumed) { - iter = collector->rbegin()->second; + iter = std::move(collector->rbegin()->second); quality = !graph.vertices.empty() && (graph.vertices.rbegin()->second == kNormalSpelling); } @@ -199,7 +200,8 @@ an ReverseLookupTranslator::Query(const string& input, segment.start, segment.end, preedit, - iter, quality); + std::move(iter), + quality); } return nullptr; } diff --git a/src/rime/gear/table_translator.cc b/src/rime/gear/table_translator.cc index 86a83b5ea..0a08749cd 100644 --- a/src/rime/gear/table_translator.cc +++ b/src/rime/gear/table_translator.cc @@ -33,11 +33,11 @@ TableTranslation::TableTranslation(TranslatorOptions* options, size_t start, size_t end, const string& preedit, - const DictEntryIterator& iter, - const UserDictEntryIterator& uter) + DictEntryIterator&& iter, + UserDictEntryIterator&& uter) : options_(options), language_(language), input_(input), start_(start), end_(end), preedit_(preedit), - iter_(iter), uter_(uter) { + iter_(std::move(iter)), uter_(std::move(uter)) { if (options_) options_->preedit_formatter().Apply(&preedit_); CheckEmpty(); @@ -190,7 +190,7 @@ bool LazyTableTranslation::FetchMoreTableEntries() { } if (more.entry_count() > previous_entry_count) { more.Skip(previous_entry_count); - iter_ = more; + iter_ = std::move(more); } return true; } @@ -276,8 +276,8 @@ an TableTranslator::Query(const string& input, segment.start, segment.start + input.length(), preedit, - iter, - uter); + std::move(iter), + std::move(uter)); } if (translation) { bool filter_by_charset = enable_charset_filter_ && @@ -618,7 +618,7 @@ TableTranslator::MakeSentence(const string& input, size_t start, entries[consumed_length] = iter.Peek(); if (start_pos == 0 && !iter.exhausted()) { // also provide words for manual composition - collector[consumed_length] = iter; + collector[consumed_length] = std::move(iter); DLOG(INFO) << "table[" << consumed_length << "]: " << collector[consumed_length].entry_count(); } diff --git a/src/rime/gear/table_translator.h b/src/rime/gear/table_translator.h index cd4b01dbe..2fa1b54cb 100644 --- a/src/rime/gear/table_translator.h +++ b/src/rime/gear/table_translator.h @@ -56,8 +56,8 @@ class TableTranslation : public Translation { size_t start, size_t end, const string& preedit, - const DictEntryIterator& iter = DictEntryIterator(), - const UserDictEntryIterator& uter = UserDictEntryIterator()); + DictEntryIterator&& iter = {}, + UserDictEntryIterator&& uter = {}); virtual bool Next(); virtual an Peek(); diff --git a/test/dictionary_test.cc b/test/dictionary_test.cc index 615c5c849..9b25b0d68 100644 --- a/test/dictionary_test.cc +++ b/test/dictionary_test.cc @@ -78,7 +78,7 @@ TEST_F(RimeDictionaryTest, ScriptLookup) { ASSERT_TRUE(bool(c)); ASSERT_TRUE(c->find(3) != c->end()); - rime::DictEntryIterator d3((*c)[3]); + rime::DictEntryIterator& d3((*c)[3]); EXPECT_FALSE(d3.exhausted()); auto e1 = d3.Peek(); ASSERT_TRUE(bool(e1)); @@ -87,14 +87,14 @@ TEST_F(RimeDictionaryTest, ScriptLookup) { EXPECT_TRUE(d3.Next()); ASSERT_TRUE(c->find(5) != c->end()); - rime::DictEntryIterator d5((*c)[5]); + rime::DictEntryIterator& d5((*c)[5]); EXPECT_FALSE(d5.exhausted()); auto e2 = d5.Peek(); ASSERT_TRUE(bool(e2)); EXPECT_EQ(2, e2->code.size()); ASSERT_TRUE(c->find(7) != c->end()); - rime::DictEntryIterator d7((*c)[7]); + rime::DictEntryIterator& d7((*c)[7]); EXPECT_FALSE(d7.exhausted()); auto e3 = d7.Peek(); ASSERT_TRUE(bool(e3));