Skip to content

Commit

Permalink
perf(dictionary): refactor DictEntryIterator and do partial sort
Browse files Browse the repository at this point in the history
  • Loading branch information
lotem committed Feb 8, 2019
1 parent c7cb47e commit 0258c7f
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 62 deletions.
53 changes: 18 additions & 35 deletions src/rime/dict/dictionary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,43 +53,26 @@ size_t match_extra_code(const table::Code* extra_code, size_t depth,

} // namespace dictionary

DictEntryIterator::DictEntryIterator()
: Base(), table_(NULL), entry_(), entry_count_(0) {
}

DictEntryIterator::DictEntryIterator(const DictEntryIterator& other)
: Base(other), table_(other.table_), entry_(other.entry_),
entry_count_(other.entry_count_) {
}

DictEntryIterator& DictEntryIterator::operator= (DictEntryIterator& other) {
DLOG(INFO) << "swapping iterator contents.";
swap(other);
table_ = other.table_;
entry_ = other.entry_;
entry_count_ = other.entry_count_;
return *this;
}

bool DictEntryIterator::exhausted() const {
return empty();
}

void DictEntryIterator::AddChunk(dictionary::Chunk&& chunk, Table* table) {
push_back(std::move(chunk));
chunks_.push_back(std::move(chunk));
entry_count_ += chunk.size;
table_ = table;
}

void DictEntryIterator::Sort() {
sort(dictionary::compare_chunk_by_head_element);
// partial-sort remaining chunks, move best match to chunk_index_
std::partial_sort(
chunks_.begin() + chunk_index_,
chunks_.begin() + chunk_index_ + 1,
chunks_.end(),
dictionary::compare_chunk_by_head_element);
}

void DictEntryIterator::PrepareEntry() {
if (empty() || !table_) {
if (exhausted() || !table_) {
return;
}
const auto& chunk(front());
const auto& chunk(chunks_[chunk_index_]);
entry_ = New<DictEntry>();
const auto& e(chunk.entries[chunk.cursor]);
DLOG(INFO) << "creating temporary dict entry '"
Expand All @@ -105,7 +88,7 @@ void DictEntryIterator::PrepareEntry() {
}

an<DictEntry> DictEntryIterator::Peek() {
while (!entry_ && !empty()) {
while (!entry_ && !exhausted()) {
PrepareEntry();
if (filter_ && !filter_(entry_)) {
Next();
Expand All @@ -116,30 +99,30 @@ an<DictEntry> DictEntryIterator::Peek() {

bool DictEntryIterator::Next() {
entry_.reset();
if (empty()) {
if (exhausted()) {
return false;
}
auto& chunk(front());
auto& chunk(chunks_[chunk_index_]);
if (++chunk.cursor >= chunk.size) {
pop_front();
++chunk_index_;
}
else {
// reorder chunks since front() has got a new head element
// reorder chunks since the current chunk has got a new head element
Sort();
}
return !empty();
return !exhausted();
}

bool DictEntryIterator::Skip(size_t num_entries) {
while (num_entries > 0) {
if (empty()) return false;
auto& chunk(front());
if (exhausted()) return false;
auto& chunk(chunks_[chunk_index_]);
if (chunk.cursor + num_entries < chunk.size) {
chunk.cursor += num_entries;
return true;
}
num_entries -= (chunk.size - chunk.cursor);
pop_front();
++chunk_index_;
}
return true;
}
Expand Down
21 changes: 10 additions & 11 deletions src/rime/dict/dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,30 +39,29 @@ bool compare_chunk_by_leading_element(const Chunk& a, const Chunk& b);

} // namespace dictionary

class DictEntryIterator : protected list<dictionary::Chunk>,
public DictEntryFilterBinder {
class DictEntryIterator : public DictEntryFilterBinder {
public:
using Base = list<dictionary::Chunk>;

RIME_API DictEntryIterator();
RIME_API DictEntryIterator(const DictEntryIterator& other);
DictEntryIterator& operator= (DictEntryIterator& other);
DictEntryIterator() = default;
DictEntryIterator(DictEntryIterator&& other) = default;
DictEntryIterator& operator= (DictEntryIterator&& other) = default;

void AddChunk(dictionary::Chunk&& chunk, Table* table);
void Sort();
RIME_API an<DictEntry> Peek();
RIME_API bool Next();
bool Skip(size_t num_entries);
RIME_API bool exhausted() const;
bool exhausted() const { return chunk_index_ == chunks_.size(); }
size_t entry_count() const { return entry_count_; }

protected:
void PrepareEntry();

private:
Table* table_;
an<DictEntry> entry_;
size_t entry_count_;
vector<dictionary::Chunk> chunks_;
size_t chunk_index_ = 0;
Table* table_ = nullptr;
an<DictEntry> entry_ = nullptr;
size_t entry_count_ = 0;
};

struct DictEntryCollector : map<size_t, DictEntryIterator> {
Expand Down
10 changes: 6 additions & 4 deletions src/rime/gear/reverse_lookup_translator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ class ReverseLookupTranslation : public TableTranslation {
const string& input,
size_t start, size_t end,
const string& preedit,
const DictEntryIterator& iter,
DictEntryIterator&& iter,
bool quality)
: TableTranslation(options, NULL, input, start, end, preedit, iter),
: TableTranslation(
options, NULL, input, start, end, preedit, std::move(iter)),
dict_(dict), options_(options), quality_(quality) {
}
virtual an<Candidate> Peek();
Expand Down Expand Up @@ -185,7 +186,7 @@ an<Translation> ReverseLookupTranslator::Query(const string& input,
auto collector = dict_->Lookup(graph, 0);
if (collector && !collector->empty() &&
collector->rbegin()->first == consumed) {
iter = collector->rbegin()->second;
iter = std::move(collector->rbegin()->second);
quality = !graph.vertices.empty() &&
(graph.vertices.rbegin()->second == kNormalSpelling);
}
Expand All @@ -199,7 +200,8 @@ an<Translation> ReverseLookupTranslator::Query(const string& input,
segment.start,
segment.end,
preedit,
iter, quality);
std::move(iter),
quality);
}
return nullptr;
}
Expand Down
14 changes: 7 additions & 7 deletions src/rime/gear/table_translator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ TableTranslation::TableTranslation(TranslatorOptions* options,
size_t start,
size_t end,
const string& preedit,
const DictEntryIterator& iter,
const UserDictEntryIterator& uter)
DictEntryIterator&& iter,
UserDictEntryIterator&& uter)
: options_(options), language_(language),
input_(input), start_(start), end_(end), preedit_(preedit),
iter_(iter), uter_(uter) {
iter_(std::move(iter)), uter_(std::move(uter)) {
if (options_)
options_->preedit_formatter().Apply(&preedit_);
CheckEmpty();
Expand Down Expand Up @@ -190,7 +190,7 @@ bool LazyTableTranslation::FetchMoreTableEntries() {
}
if (more.entry_count() > previous_entry_count) {
more.Skip(previous_entry_count);
iter_ = more;
iter_ = std::move(more);
}
return true;
}
Expand Down Expand Up @@ -276,8 +276,8 @@ an<Translation> TableTranslator::Query(const string& input,
segment.start,
segment.start + input.length(),
preedit,
iter,
uter);
std::move(iter),
std::move(uter));
}
if (translation) {
bool filter_by_charset = enable_charset_filter_ &&
Expand Down Expand Up @@ -618,7 +618,7 @@ TableTranslator::MakeSentence(const string& input, size_t start,
entries[consumed_length] = iter.Peek();
if (start_pos == 0 && !iter.exhausted()) {
// also provide words for manual composition
collector[consumed_length] = iter;
collector[consumed_length] = std::move(iter);
DLOG(INFO) << "table[" << consumed_length << "]: "
<< collector[consumed_length].entry_count();
}
Expand Down
4 changes: 2 additions & 2 deletions src/rime/gear/table_translator.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ class TableTranslation : public Translation {
size_t start,
size_t end,
const string& preedit,
const DictEntryIterator& iter = DictEntryIterator(),
const UserDictEntryIterator& uter = UserDictEntryIterator());
DictEntryIterator&& iter = {},
UserDictEntryIterator&& uter = {});

virtual bool Next();
virtual an<Candidate> Peek();
Expand Down
6 changes: 3 additions & 3 deletions test/dictionary_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ TEST_F(RimeDictionaryTest, ScriptLookup) {
ASSERT_TRUE(bool(c));

ASSERT_TRUE(c->find(3) != c->end());
rime::DictEntryIterator d3((*c)[3]);
rime::DictEntryIterator& d3((*c)[3]);
EXPECT_FALSE(d3.exhausted());
auto e1 = d3.Peek();
ASSERT_TRUE(bool(e1));
Expand All @@ -87,14 +87,14 @@ TEST_F(RimeDictionaryTest, ScriptLookup) {
EXPECT_TRUE(d3.Next());

ASSERT_TRUE(c->find(5) != c->end());
rime::DictEntryIterator d5((*c)[5]);
rime::DictEntryIterator& d5((*c)[5]);
EXPECT_FALSE(d5.exhausted());
auto e2 = d5.Peek();
ASSERT_TRUE(bool(e2));
EXPECT_EQ(2, e2->code.size());

ASSERT_TRUE(c->find(7) != c->end());
rime::DictEntryIterator d7((*c)[7]);
rime::DictEntryIterator& d7((*c)[7]);
EXPECT_FALSE(d7.exhausted());
auto e3 = d7.Peek();
ASSERT_TRUE(bool(e3));
Expand Down

0 comments on commit 0258c7f

Please sign in to comment.