Skip to content
This repository was archived by the owner on Sep 29, 2023. It is now read-only.

Commit 84a5eb2

Browse files
author
jchapuis
committed
multiple optimizations in TermsParsers
1 parent f3ceb83 commit 84a5eb2

File tree

5 files changed

+112
-63
lines changed

5 files changed

+112
-63
lines changed

build.sbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name := "scala-parser-combinators-completion"
22
organization := "com.nexthink"
33
licenses += ("MIT", url("http://opensource.org/licenses/MIT"))
4-
version := "1.0.6"
4+
version := "1.0.7"
55
scalaVersion := "2.12.2"
66
bintrayRepository := "maven"
77
bintrayVcsUrl := Some("jchapuis@github.com:jchapuis/scala-parser-combinators-completion")

src/main/scala/com/nexthink/utils/parsing/combinator/completion/CompletionTypes.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ trait CompletionTypes {
215215
}
216216

217217
def completionStrings: Seq[String] =
218-
sets.values.toSeq
218+
sets.values.toStream
219219
.sortBy(_.score)
220220
.reverse
221221
.flatMap(_.stringEntries)
@@ -248,7 +248,7 @@ trait CompletionTypes {
248248
}
249249

250250
private def encodeJson(meta: JValue) = compact(render(meta))
251-
private def printJson(meta: JValue) = pretty(render(meta))
251+
private def printJson(meta: JValue) = pretty(render(meta))
252252

253253
case object Completions {
254254
def apply(position: Position, meta: Option[String], completionSets: Seq[(String, CompletionSet)]): Completions =
@@ -261,7 +261,7 @@ trait CompletionTypes {
261261
Completions(position, meta, CompletionSet(completions))
262262
def apply(position: Position, completions: Traversable[Elems]): Completions =
263263
Completions(position, None, CompletionSet(completions))
264-
def apply(position: Position, meta:Option[String], completionSets: Iterable[CompletionSet]): Completions =
264+
def apply(position: Position, meta: Option[String], completionSets: Iterable[CompletionSet]): Completions =
265265
Completions(position, meta, completionSets.map(s => s.tag.label -> s).toSeq)
266266
def apply(position: Position, completionSets: Iterable[CompletionSet]): Completions =
267267
Completions(position, None, completionSets.map(s => s.tag.label -> s).toSeq)

src/main/scala/com/nexthink/utils/parsing/combinator/completion/TermsParsers.scala

Lines changed: 71 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,20 @@ import scala.util.parsing.combinator.RegexParsers
1818
* completion (supporting fuzzy matching)
1919
*/
2020
trait TermsParsers extends RegexParsers with RegexCompletionSupport with TermsParsingHelpers {
21-
val DefaultMaxCompletionsCount = 15 // exposed
21+
val DefaultMaxCompletionsCount = 15 // exposed
2222
private val DefaultSimilarityThreshold = 20
2323
private val CompletionCandidatesMultiplierRatio = 3
2424

2525
/**
2626
* This defines a parser which parses any of the specified terms.
2727
* The parser performs a fast match by means of a trie data structure, initialized upon creation.
28-
* Completions will return all available terms below the matching trie node (if any)
28+
* Completions will return all available terms below the matching trie node, in alphabetical order (if any)
2929
* @param terms the list of terms to build the parser for
30+
* @param maxCompletionsCount maximum number of completions returned by the parser
3031
* @return parser instance
3132
*/
32-
def oneOfTerms(terms: Seq[String]): Parser[String] = {
33-
new TermsParser(terms)
33+
def oneOfTerms(terms: Seq[String], maxCompletionsCount: Int = DefaultMaxCompletionsCount): Parser[String] = {
34+
TermsParser(terms, maxCompletionsCount)
3435
}
3536

3637
/**
@@ -59,17 +60,19 @@ trait TermsParsers extends RegexParsers with RegexCompletionSupport with TermsPa
5960
similarityMeasure: (String, String) => Double = diceSorensenSimilarity,
6061
similarityThreshold: Int = DefaultSimilarityThreshold,
6162
maxCompletionsCount: Int = DefaultMaxCompletionsCount): Parser[String] = {
62-
new FuzzyParser(terms, similarityMeasure, similarityThreshold, maxCompletionsCount)
63+
FuzzyParser(terms, similarityMeasure, similarityThreshold, maxCompletionsCount)
6364
}
6465

65-
sealed private class TermsParser(terms: Seq[String]) extends Parser[String] {
66-
protected def originalTerms: Seq[String] = terms.map(_.trim()).filter(_.nonEmpty)
67-
protected def normalizedTerms: Seq[String] = originalTerms.map(_.toLowerCase)
68-
69-
protected val trie: PrefixMap[String] = PrefixMap(normalizedTerms.zip(originalTerms).map {
70-
case (normalizedTerm, originalTerm) => (normalizedTerm, originalTerm)
71-
}: _*)
66+
private object TermsParser {
67+
def apply(terms: Seq[String], maxCompletionsCount: Int): TermsParser = {
68+
val trie = PrefixMap(normalizedTerms(terms).zip(trimmedNonEmptyTerms(terms)).map {
69+
case (normalizedTerm, originalTerm) => (normalizedTerm, originalTerm)
70+
}: _*)
71+
new TermsParser(trie, maxCompletionsCount)
72+
}
73+
}
7274

75+
sealed private class TermsParser(trie: PrefixMap[String], maxCompletionsCount: Int) extends Parser[String] {
7376
override def apply(in: Input): ParseResult[String] = {
7477
val start = dropAnyWhiteSpace(in)
7578
val (terms, finalPosition) =
@@ -90,57 +93,75 @@ trait TermsParsers extends RegexParsers with RegexCompletionSupport with TermsPa
9093
case Success(_, _) => Completions.empty
9194
case NoSuccess(_, _) =>
9295
val start = dropAnyWhiteSpace(in)
93-
val terms = findAllTermsWithPrefix(start, start.offset, trie)
94-
Completions(in.pos, CompletionSet(terms.map(t => Completion(t))))
96+
val terms = alphabeticalCompletions(findAllTermsWithPrefix(start, start.offset, trie), maxCompletionsCount)
97+
Completions(in.pos, terms)
9598
}
9699
}
97100
}
98101

99-
sealed private class FuzzyParser(terms: Seq[String], similarityMeasure: (String, String) => Double, similarityThreshold: Int, maxCompletionsCount: Int)
100-
extends TermsParser(terms) {
102+
private def trimmedNonEmptyTerms(terms: Seq[String]) = terms.map(_.trim()).filter(_.nonEmpty)
103+
private def normalizedTerms(terms: Seq[String]) = terms.map(_.toLowerCase)
104+
105+
private def lazyQuicksort[A](xs: Stream[A])(implicit o: Ordering[A]): Stream[A] = {
106+
import o._
107+
if (xs.isEmpty) xs
108+
else {
109+
val (smaller, bigger) = xs.tail.partition(_ < xs.head)
110+
lazyQuicksort(smaller) #::: xs.head #:: lazyQuicksort(bigger)
111+
}
112+
}
101113

102-
val ngramMap: PrefixMap[Array[String]] = {
114+
private def alphabeticalCompletions(terms: Iterable[String], maxCompletionsCount: Int): CompletionSet =
115+
CompletionSet(
116+
lazyQuicksort(terms.toStream)
117+
.take(maxCompletionsCount)
118+
.reverse
119+
.zipWithIndex
120+
.map {
121+
case (t, rank) => Completion(t, rank)
122+
}
123+
.toSet)
124+
125+
private object FuzzyParser {
126+
def apply(terms: Seq[String], similarityMeasure: (String, String) => Double, similarityThreshold: Int, maxCompletionsCount: Int): FuzzyParser = {
127+
val originals = trimmedNonEmptyTerms(terms)
128+
val normalized = normalizedTerms(terms)
129+
val completionsWhenInputEmpty = alphabeticalCompletions(originals, maxCompletionsCount)
103130
val trigramTermPairs =
104-
normalizedTerms.zip(originalTerms).par.flatMap {
131+
normalized.zip(originals).par.flatMap {
105132
case (normalizedTerm, originalTerm) =>
106133
tokenizeWords(normalizedTerm).flatMap(trigramsWithAffixing).map(trigram => trigram -> originalTerm)
107134
}
108-
PrefixMap(trigramTermPairs.groupBy(_._1).mapValues(_.map(_._2).toArray).toSeq.seq: _*)
135+
val ngramMap = PrefixMap(trigramTermPairs.groupBy(_._1).mapValues(_.map(_._2).toArray).toSeq.seq: _*)
136+
val trie = PrefixMap(normalized.zip(originals).map {
137+
case (normalizedTerm, originalTerm) => (normalizedTerm, originalTerm)
138+
}: _*)
139+
new FuzzyParser(completionsWhenInputEmpty, ngramMap, trie, similarityMeasure, similarityThreshold, maxCompletionsCount)
109140
}
141+
}
142+
143+
sealed private class FuzzyParser private (completionsWhenInputEmpty: CompletionSet,
144+
ngramMap: PrefixMap[Array[String]],
145+
trie: PrefixMap[String],
146+
similarityMeasure: (String, String) => Double,
147+
similarityThreshold: Int,
148+
maxCompletionsCount: Int)
149+
extends TermsParser(trie, maxCompletionsCount) {
110150

111151
override def completions(in: Input): Completions = {
112152
apply(in) match {
113153
case Success(_, _) => Completions.empty
114154
case NoSuccess(_, _) =>
115155
val start = dropAnyWhiteSpace(in)
116156
if (start.atEnd) {
117-
// return everything
118-
alphabeticalCompletions(start)
157+
Completions(in.pos, completionsWhenInputEmpty)
119158
} else {
120159
fuzzyCompletions(start)
121160
}
122161
}
123162
}
124163

125-
val maxCandidatesCount: Int = maxCompletionsCount * CompletionCandidatesMultiplierRatio
126-
127-
private def alphabeticalCompletions(in: Input): Completions = {
128-
val matches = ngramMap
129-
.withPrefix(remainder(in).toLowerCase)
130-
.values
131-
.flatten
132-
.toList
133-
.distinct
134-
.sorted
135-
.take(maxCompletionsCount)
136-
if (matches.nonEmpty) {
137-
Completions(in.pos, CompletionSet(matches.reverse.zipWithIndex.map {
138-
case (t, rank) => Completion(t, rank)
139-
}.toSet))
140-
} else {
141-
Completions.empty
142-
}
143-
}
164+
private val maxCandidatesCount: Int = maxCompletionsCount * CompletionCandidatesMultiplierRatio
144165

145166
private def findAndScoreNgramMatches(ngrams: Seq[String]): Map[String, Int] = {
146167
def iter(ngram: String, remainingNgrams: Seq[String], termsFromPreviousIter: Set[String], acc: Map[String, Int]): Map[String, Int] = {
@@ -163,15 +184,15 @@ trait TermsParsers extends RegexParsers with RegexCompletionSupport with TermsPa
163184
private def fuzzyCompletions(in: Input): Completions = {
164185
val incompleteTerm = remainder(in)
165186
val candidates = findCandidateMatches(incompleteTerm)
166-
val rankedCompletions = candidates
167-
.map {
168-
case (candidateTerm, _) =>
169-
(candidateTerm, math.round(similarityMeasure(incompleteTerm, candidateTerm) * 100.0).toInt)
170-
}
171-
.filter { case (_, similarity) => similarity >= similarityThreshold }
172-
.sortBy {
173-
case (term, similarity) => (-similarity, term)
174-
}
187+
val rankedCompletions = lazyQuicksort(
188+
candidates.toStream
189+
.map {
190+
case (candidateTerm, _) =>
191+
(candidateTerm, math.round(similarityMeasure(incompleteTerm, candidateTerm) * 100.0).toInt)
192+
}
193+
.filter { case (_, similarity) => similarity >= similarityThreshold })(Ordering.by({
194+
case (term, similarity) => (-similarity, term)
195+
}))
175196
.take(maxCompletionsCount)
176197
if (rankedCompletions.nonEmpty) {
177198
Completions(in.pos, CompletionSet(rankedCompletions.map {
@@ -185,7 +206,7 @@ trait TermsParsers extends RegexParsers with RegexCompletionSupport with TermsPa
185206
private def findCandidateMatches(incompleteTerm: String): Seq[(String, Int)] = {
186207
val trigrams = trigramsWithAffixing(incompleteTerm.toLowerCase)
187208
val matchingTerms: Map[String, Int] = findAndScoreNgramMatches(trigrams)
188-
matchingTerms.toSeq.sortBy(_._2).reverse.take(maxCandidatesCount)
209+
matchingTerms.toSeq.sortBy(_._2).view.reverse.take(maxCandidatesCount)
189210
}
190211
}
191212

src/main/scala/com/nexthink/utils/parsing/combinator/completion/TermsParsingHelpers.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,25 @@ trait TermsParsingHelpers { this: RegexParsers =>
2323
reader.source.subSequence(start, end).toString
2424
private def lastPosition[T](reader: Reader[T]): Int = reader.source.length
2525

26-
protected def findAllMatchingTerms(in: Input, pos: Int, map: PrefixMap[String]): (Seq[(String, Int)], Int) = {
27-
def findAllMatchingTermsIter(in: Input, pos: Int, map: PrefixMap[String], prevMatches: Seq[(String, Int)]): (Seq[(String, Int)], Int) = {
26+
protected def findAllMatchingTerms(in: Input, pos: Int, map: PrefixMap[String]): (Stream[(String, Int)], Int) = {
27+
def findAllMatchingTermsIter(in: Input, pos: Int, map: PrefixMap[String], prevMatches: Stream[(String, Int)]): (Stream[(String, Int)], Int) = {
2828
lazy val nextSuffixChar = charAtPosition(in, pos)
2929
if (handleWhiteSpace(in.source, pos) < lastPosition(in) && map.hasSuffix(nextSuffixChar)) {
3030
findAllMatchingTermsIter(in, pos + 1, map.withPrefix(nextSuffixChar), prevMatches ++ map.value.map((_, pos)))
3131
} else {
3232
(prevMatches ++ map.value.map((_, pos)), pos)
3333
}
3434
}
35-
findAllMatchingTermsIter(in, pos, map, Seq())
35+
findAllMatchingTermsIter(in, pos, map, Stream())
3636
}
3737

38-
protected def findAllTermsWithPrefix(in: Input, pos: Int, map: PrefixMap[String]): Seq[String] = {
39-
def findAllTermsWithPrefixIter(in: Input, pos: Int, map: PrefixMap[String]): Seq[String] = {
38+
protected def findAllTermsWithPrefix(in: Input, pos: Int, map: PrefixMap[String]): Stream[String] = {
39+
def findAllTermsWithPrefixIter(in: Input, pos: Int, map: PrefixMap[String]): Stream[String] = {
4040
lazy val nextSuffixChar = charAtPosition(in, pos)
4141
if (handleWhiteSpace(in.source, pos) < lastPosition(in) && map.hasSuffix(nextSuffixChar)) {
4242
findAllTermsWithPrefixIter(in, pos + 1, map.withPrefix(nextSuffixChar))
4343
} else {
44-
map.toSeq.map { case (_, term) => term }
44+
map.toStream.map { case (_, term) => term }
4545
}
4646
}
4747
findAllTermsWithPrefixIter(in, pos, map)

src/test/scala/com/nexthink/utils/parsing/combinator/completion/TermsParsersTest.scala

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,17 @@ class TermsParsersTest extends PropSpec with PropertyChecks with Matchers with I
3535
val samples = Table(
3636
"skyp" -> "Skype, Skype Handsfree Support, Skype Monitor",
3737
"NEXT" -> "NEXThink Finder",
38-
"A" -> "Activity Monitor, Adobe Acrobat"
38+
"A" -> "Activity Monitor, Adobe Acrobat"
3939
)
4040
forAll(samples) { (partial: String, options: String) =>
4141
val completedTerms = options.split(",").map(_.trim)
42-
val completions = termsParsers$.completeString(terms, partial)
42+
val completions = termsParsers$.completeString(terms, partial)
4343
completions shouldBe completedTerms
4444
}
4545
}
4646

4747
property("oneOfTerms returns correct next") {
48-
val terms = termsParsers$.oneOfTerms(examples)
48+
val terms = termsParsers$.oneOfTerms(examples)
4949
val result = termsParsers$.parse(terms, "skype h")
5050
result.successful shouldBe true
5151
result.next.pos.column shouldBe 6
@@ -89,6 +89,34 @@ class TermsParsersTest extends PropSpec with PropertyChecks with Matchers with I
8989
}
9090
}
9191

92+
property("oneOfTerms with empty completes with all terms in alphabetical order") {
93+
forAll(sampleTerms) { terms: List[String] =>
94+
{
95+
val parser = termsParsers$.oneOfTerms(terms)
96+
val completions = termsParsers$.complete(parser, " ")
97+
withClue(s"terms=$terms, completions=$completions") {
98+
completions.defaultSet.isDefined shouldBe true
99+
terms.distinct.sorted.zipAll(completions.completionStrings, "extraCompletion", "missingCompletion").foreach {
100+
case (expected, actual) => actual === expected
101+
}
102+
}
103+
}
104+
}
105+
}
106+
107+
property("oneOfTerms with empty spaces completes at the last relevant input position") {
108+
forAll(sampleTerms, Gen.chooseNum(1, 10)) { (terms: List[String], spacesCount: Int) =>
109+
{
110+
val spaces = Seq.range(0, spacesCount).map(_ => " ").mkString
111+
val parser = termsParsers$.oneOfTerms(terms)
112+
val completions = termsParsers$.complete(parser, spaces)
113+
withClue(s"terms=$terms, completions=$completions") {
114+
completions.position.column shouldBe 1
115+
}
116+
}
117+
}
118+
}
119+
92120
property("oneOfTermsFuzzy with empty completes with all terms in alphabetical order") {
93121
forAll(sampleTerms) { terms: List[String] =>
94122
{

0 commit comments

Comments
 (0)