Skip to content

Commit

Permalink
fix(octagram): buffer overflow when querying long words
Browse files Browse the repository at this point in the history
  • Loading branch information
lotem committed Feb 10, 2021
1 parent f92e083 commit ef5af4f
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 9 deletions.
6 changes: 3 additions & 3 deletions src/gram_encoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
namespace rime {
namespace grammar {

string encode(const char* p) {
char encoded_str[32];
string encode(const char* begin, const char* end) {
char encoded_str[kMaxEncodedUnicode * 4];
char* e = encoded_str;
while (*p) {
for (auto p = begin; p < end; ) {
uint32_t u = utf8::unchecked::next(p);
if (u < 0x80) {
if (u == 0) {
Expand Down
6 changes: 4 additions & 2 deletions src/gram_encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
namespace rime {
namespace grammar {

string encode(const char* p);
constexpr int kMaxEncodedUnicode = 8;

string encode(const char* begin, const char* end);
inline string encode(const string& utf8_str) {
return encode(utf8_str.c_str());
return encode(utf8_str.c_str(), utf8_str.c_str() + utf8_str.length());
}
const char* next_unicode(const char* p);
size_t unicode_length(const string& encoded, size_t length);
Expand Down
26 changes: 22 additions & 4 deletions src/octagram.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "gram_db.h"
#include "gram_encoding.h"
#include "octagram.h"
#include <algorithm>
#include <rime/config.h>
#include <rime/resource.h>
#include <rime/service.h>
Expand Down Expand Up @@ -87,6 +88,19 @@ inline static const char* last_n_unicode(const string& str,
return p;
}

inline static const char* first_n_unicode(const string& str,
int max,
int& out_count) {
const char* p = str_begin(str);
const char* end = str_end(str);
out_count = 0;
while (p != end && out_count < max) {
utf8::unchecked::next(p);
++out_count;
}
return p;
}

inline static bool matches_whole_query(const char* context_ptr,
const string& context_query,
size_t match_length,
Expand All @@ -103,12 +117,16 @@ double Octagram::Query(const string& context,
}
double result = config_->non_collocation_penalty;
GramDb::Match matches[GramDb::kMaxResults];
int n = (std::min)(grammar::kMaxEncodedUnicode,
config_->collocation_max_length - 1);
int context_len = 0;
string context_query = grammar::encode(
last_n_unicode(context,
config_->collocation_max_length - 1,
context_len));
string word_query = grammar::encode(word);
last_n_unicode(context, n, context_len),
str_end(context));
int word_query_len = 0;
string word_query = grammar::encode(
str_begin(word),
first_n_unicode(word, n, word_query_len));
for (const char* context_ptr = str_begin(context_query);
context_len > 0;
--context_len, context_ptr = grammar::next_unicode(context_ptr)) {
Expand Down

0 comments on commit ef5af4f

Please sign in to comment.