Skip to content
This repository was archived by the owner on Sep 29, 2023. It is now read-only.

Commit 0e99a22

Browse files
author
Jonas Chapuis
authored
Merge pull request #1 from jchapuis/expansion
Expansion mechanism
2 parents 40eee56 + d57cb24 commit 0e99a22

File tree

9 files changed

+354
-105
lines changed

9 files changed

+354
-105
lines changed

README.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -233,11 +233,11 @@ Completing on the same expressions (`2`, `2+`, `(10*2`) now leads to the followi
233233

234234
## Fuzzy completion
235235

236-
This library also provides special parsers which support fuzzy completion, present in the `FuzzyParsers` trait, by means of the `oneOfTerms` method capable of fuzzing completion on the input to match a set of terms (note that parsing itself obviously requires an exact match and is really fast thanks to a prefix trie lookup on each input char). For instance, with the following dummy grammar:
236+
This library also provides special parsers which support fuzzy completion, present in the `TermsParsers` trait, by means of the `oneOfTermsFuzzy` method capable of fuzzing completion on the input to match a set of terms (note that parsing itself obviously requires an exact match and is fast thanks to a prefix trie lookup on each input char). For instance, with the following dummy grammar:
237237

238238
```scala
239-
object Grammar extends FuzzyParsers {
240-
val fuzzyCountries = "my favourite country is " ~ oneOfTerms(Seq("United States of America", "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua & Deps", "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bhutan", "Bolivia", "Bosnia Herzegovina", "Botswana", "Brazil", "Brunei", "Bulgaria", "Burkina", "Burma", "Burundi", "Cambodia", "Cameroon", "Canada", "Cape Verde", "Central African Rep", "Chad", "Chile", "People's Republic of China", "Republic of China", "Colombia", "Comoros", "Democratic Republic of the Congo", "Republic of the Congo", "Costa Rica,", "Croatia", "Cuba", "Cyprus", "Czech Republic", "Danzig", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "East Timor", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Ethiopia", "Fiji", "Finland", "France", "Gabon", "Gaza Strip", "The Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada", "Guatemala", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Holy Roman Empire", "Honduras", "Hungary", "Iceland", "India", "Indonesia", "Iran", "Iraq", "Republic of Ireland", "Israel", "Italy", "Ivory Coast", "Jamaica", "Japan", "Jonathanland", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "North Korea", "South Korea", "Kosovo", "Kuwait", "Kyrgyzstan", "Laos", "Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein", "Lithuania", "Luxembourg", "Macedonia", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands", "Mauritania", "Mauritius", "Mexico", "Micronesia", "Moldova", "Monaco", "Mongolia", "Montenegro", "Morocco", "Mount Athos", "Mozambique", "Namibia", "Nauru", "Nepal", "Newfoundland", "Netherlands", "New Zealand", "Nicaragua", "Niger", "Nigeria", "Norway", "Oman", "Ottoman Empire", "Pakistan", "Palau", "Panama", "Papua New Guinea", "Paraguay", "Peru", "Philippines", "Poland", "Portugal", "Prussia", "Qatar", "Romania", "Rome", "Russian Federation", "Rwanda", "St Kitts & Nevis", "St Lucia", "Saint Vincent & the", "Grenadines", "Samoa", "San Marino", "Sao Tome & Principe", "Saudi Arabia", "Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovakia", "Slovenia", "Solomon Islands", "Somalia", "South Africa", "Spain", "Sri Lanka", "Sudan", "Suriname", "Swaziland", "Sweden", "Switzerland", "Syria", "Tajikistan", "Tanzania", "Thailand", "Togo", "Tonga", "Trinidad & Tobago", "Tunisia", "Turkey", "Turkmenistan", "Tuvalu", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "Uruguay", "Uzbekistan", "Vanuatu", "Vatican City", "Venezuela", "Vietnam", "Yemen", "Zambia", "Zimbabwe"))
239+
object Grammar extends TermsParsers {
240+
val fuzzyCountries = "my favourite country is " ~ oneOfTermsFuzzy(Seq("United States of America", "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua & Deps", "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bhutan", "Bolivia", "Bosnia Herzegovina", "Botswana", "Brazil", "Brunei", "Bulgaria", "Burkina", "Burma", "Burundi", "Cambodia", "Cameroon", "Canada", "Cape Verde", "Central African Rep", "Chad", "Chile", "People's Republic of China", "Republic of China", "Colombia", "Comoros", "Democratic Republic of the Congo", "Republic of the Congo", "Costa Rica,", "Croatia", "Cuba", "Cyprus", "Czech Republic", "Danzig", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "East Timor", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Ethiopia", "Fiji", "Finland", "France", "Gabon", "Gaza Strip", "The Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada", "Guatemala", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Holy Roman Empire", "Honduras", "Hungary", "Iceland", "India", "Indonesia", "Iran", "Iraq", "Republic of Ireland", "Israel", "Italy", "Ivory Coast", "Jamaica", "Japan", "Jonathanland", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "North Korea", "South Korea", "Kosovo", "Kuwait", "Kyrgyzstan", "Laos", "Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein", "Lithuania", "Luxembourg", "Macedonia", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands", "Mauritania", "Mauritius", "Mexico", "Micronesia", "Moldova", "Monaco", "Mongolia", "Montenegro", "Morocco", "Mount Athos", "Mozambique", "Namibia", "Nauru", "Nepal", "Newfoundland", "Netherlands", "New Zealand", "Nicaragua", "Niger", "Nigeria", "Norway", "Oman", "Ottoman Empire", "Pakistan", "Palau", "Panama", "Papua New Guinea", "Paraguay", "Peru", "Philippines", "Poland", "Portugal", "Prussia", "Qatar", "Romania", "Rome", "Russian Federation", "Rwanda", "St Kitts & Nevis", "St Lucia", "Saint Vincent & the", "Grenadines", "Samoa", "San Marino", "Sao Tome & Principe", "Saudi Arabia", "Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovakia", "Slovenia", "Solomon Islands", "Somalia", "South Africa", "Spain", "Sri Lanka", "Sudan", "Suriname", "Swaziland", "Sweden", "Switzerland", "Syria", "Tajikistan", "Tanzania", "Thailand", "Togo", "Tonga", "Trinidad & Tobago", "Tunisia", "Turkey", "Turkmenistan", "Tuvalu", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "Uruguay", "Uzbekistan", "Vanuatu", "Vatican City", "Venezuela", "Vietnam", "Yemen", "Zambia", "Zimbabwe"))
241241
}
242242
```
243243

@@ -300,15 +300,15 @@ leads to:
300300
}
301301
```
302302

303-
### `oneOfTerms` parameters
303+
### `oneOfTermsFuzzy` parameters
304304

305-
Below the signature of the `oneOfTerms` method:
305+
Below the signature of the `oneOfTermsFuzzy` method:
306306

307307
```scala
308-
def oneOfTerms(terms: Seq[String],
309-
similarityMeasure: (String, String) => Double = diceSorensenSimilarity,
310-
similarityThreshold: Int = DefaultSimilarityThreshold,
311-
maxCompletionsCount: Int = DefaultMaxCompletionsCount)
308+
def oneOfTermsFuzzy(terms: Seq[String],
309+
similarityMeasure: (String, String) => Double = diceSorensenSimilarity,
310+
similarityThreshold: Int = DefaultSimilarityThreshold,
311+
maxCompletionsCount: Int = DefaultMaxCompletionsCount)
312312
```
313313

314314
- `terms`: the list of terms to build the parser for

build.sbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name := "scala-parser-combinators-completion"
22
organization := "com.nexthink"
33
licenses += ("MIT", url("http://opensource.org/licenses/MIT"))
4-
version := "1.0.1"
4+
version := "1.0.2"
55
scalaVersion := "2.12.2"
66
bintrayRepository := "maven"
77
bintrayVcsUrl := Some("jchapuis@github.com:jchapuis/scala-parser-combinators-completion")
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* scala-parser-combinators-completion
3+
* Copyright (c) by Nexthink S.A.
4+
* Lausanne, Switzerland (http://www.nexthink.com)
5+
* Author: jonas.chapuis@nexthink.com
6+
*/
7+
8+
package com.nexthink.utils.parsing.combinator.completion
9+
10+
import scala.util.parsing.input.{CharSequenceReader, OffsetPosition, Position, Reader}
11+
12+
trait CompletionExpansionSupport extends RegexCompletionSupport {
13+
14+
/**
15+
* Adapts a parser so that completing it will list all possible expanded completions (which successfully parse)
16+
* (note that if this is used within the context of a grammar allowing for infinitely growing expressions, this
17+
* will trigger infinite recursion and will end up in a `StackOverflowException`)
18+
* @param p the parser
19+
* @param onlyAtInputEnd expansion happens only when input is positioned exactly at the end upon completion
20+
* @tparam T the parser type
21+
* @return a parser adapter performing completion expansion
22+
*/
23+
def allExpandedCompletions[T](p: Parser[T], onlyAtInputEnd: Boolean = true): Parser[T] = expandedCompletions(p, p, onlyAtInputEnd)
24+
25+
/**
26+
* Adapts a parser so that completing it will construct the list of all possible alternatives up to the point
27+
* where the passed `stop` parser successfully parses the expansions.
28+
* (note that if this is used within the context of a grammar allowing for infinitely growing expressions,
29+
* selecting the relevant stop parser is critical to avoid infinite recursion)
30+
* @param p the parser
31+
* @param onlyAtInputEnd expansion happens only when input is positioned exactly at the end upon completion
32+
* @param stop the parser signalling the end of exploration upon successful parse
33+
* @tparam T the parser type
34+
* @return a parser adapter performing completion expansion limited according to `stop` parser
35+
*/
36+
def expandedCompletions[T](p: Parser[T], stop: Parser[Any], onlyAtInputEnd: Boolean = true): Parser[T] =
37+
Parser(
38+
p,
39+
in => {
40+
lazy val isAtInputEnd = dropAnyWhiteSpace(in).atEnd
41+
if (!onlyAtInputEnd || isAtInputEnd) {
42+
val Completions(_, sets) = exploreCompletions(p, stop, in)
43+
Completions(OffsetPosition(in.source, handleWhiteSpace(in)), sets)
44+
} else
45+
p.completions(in)
46+
}
47+
)
48+
49+
private def exploreCompletions[T](p: Parser[T], stop: Parser[T], in: Input): Completions = {
50+
def completeString(s: String, position: Int, c: Completion) = {
51+
val input = s.substring(0, position - 1)
52+
if (input.trim.isEmpty) c.value.toString() else s"$input ${c.value}"
53+
}
54+
def exploreCompletionsRec(str: String, completions: Completions): Completions = {
55+
if (completions.isEmpty) completions
56+
else
57+
completions.allSets
58+
.map(cSet => {
59+
cSet.completions
60+
.map(c => {
61+
val completedInput = completeString(str, completions.position.column, c)
62+
if (stop(new CharSequenceReader(completedInput)).successful) {
63+
Completions(in.pos, CompletionSet(cSet.tag, Set(Completion(completedInput, c.score, c.kind))))
64+
} else {
65+
exploreCompletionsRec(completedInput, p.completions(ExplorerReader(p, completedInput)))
66+
}
67+
})
68+
.reduce((a, b) => a | b)
69+
})
70+
.reduce((a, b) => a | b)
71+
}
72+
if (in match {
73+
case ExplorerReader(exploredParser, _, _) if exploredParser == p => true
74+
case _ => false
75+
}) {
76+
// recursive parser => avoid infinite exploration
77+
p.completions(in)
78+
} else {
79+
val inputAtPosition = if (in.atEnd) "" else in.rest.source.subSequence(in.offset, in.source.length()).toString.trim
80+
exploreCompletionsRec(inputAtPosition, p.completions(ExplorerReader(p, inputAtPosition)))
81+
}
82+
}
83+
84+
private case class ExplorerReader(exploredParser: Parser[_], override val source: java.lang.CharSequence, override val offset: Int) extends Reader[Char] {
85+
val charReader = new CharSequenceReader(source, offset)
86+
87+
def first = charReader.first
88+
89+
def rest: ExplorerReader =
90+
if (offset < source.length) new ExplorerReader(exploredParser, source, offset + 1)
91+
else this
92+
93+
def pos: Position = charReader.pos
94+
95+
def atEnd = charReader.atEnd
96+
97+
override def drop(n: Int): ExplorerReader = new ExplorerReader(exploredParser, source, offset + n)
98+
}
99+
100+
private case object ExplorerReader {
101+
def apply(exploredParser: Parser[_], source: java.lang.CharSequence): ExplorerReader = ExplorerReader(exploredParser, source, 0)
102+
}
103+
104+
}

src/main/scala/com/nexthink/utils/parsing/combinator/completion/CompletionSupport.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77

88
package com.nexthink.utils.parsing.combinator.completion
99

10+
import java.io.StringReader
11+
1012
import scala.util.parsing.combinator.Parsers
11-
import scala.util.parsing.input.Positional
13+
import scala.util.parsing.input.{CharSequenceReader, Positional}
1214
import scala.language.implicitConversions
1315

1416
// scalastyle:off method.name
@@ -895,4 +897,6 @@ trait CompletionSupport extends Parsers with CompletionTypes {
895897
*/
896898
def phrase[T](p: Parser[T]): Parser[T] =
897899
Parser(super.phrase(p), p.completions)
900+
901+
898902
}

src/main/scala/com/nexthink/utils/parsing/combinator/completion/RegexCompletionSupport.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ package com.nexthink.utils.parsing.combinator.completion
99

1010
import scala.util.matching.Regex
1111
import scala.util.parsing.combinator.RegexParsers
12-
import scala.util.parsing.input.{CharSequenceReader, OffsetPosition, Positional, Reader}
12+
import scala.util.parsing.input._
1313
import scala.language.implicitConversions
1414

1515
/** This component extends `RegexParsers` with completion capability. In particular,
@@ -22,7 +22,7 @@ import scala.language.implicitConversions
2222
trait RegexCompletionSupport extends RegexParsers with CompletionSupport {
2323
protected val areLiteralsCaseSensitive = false
2424

25-
protected def dropWhiteSpace(input: Input): Input =
25+
protected def dropAnyWhiteSpace(input: Input): Input =
2626
input.drop(handleWhiteSpace(input.source, input.offset) - input.offset)
2727

2828
protected def handleWhiteSpace(input: Input): Int =
@@ -36,8 +36,7 @@ trait RegexCompletionSupport extends RegexParsers with CompletionSupport {
3636
var sourcePos = start
3737
def charsEqual(a: Char, b: Char) =
3838
if (areLiteralsCaseSensitive) a == b else a.toLower == b.toLower
39-
while (literalPos < s.length && sourcePos < source.length && charsEqual(s.charAt(literalPos),
40-
source.charAt(sourcePos))) {
39+
while (literalPos < s.length && sourcePos < source.length && charsEqual(s.charAt(literalPos), source.charAt(sourcePos))) {
4140
literalPos += 1
4241
sourcePos += 1
4342
}
@@ -55,8 +54,7 @@ trait RegexCompletionSupport extends RegexParsers with CompletionSupport {
5554
literalOffset match {
5655
case 0 if inputAtEnd =>
5756
literalCompletion // whitespace, free entry possible
58-
case someOffset: Int
59-
if inputAtEnd & someOffset > 0 & someOffset < s.length => // partially entered literal, we are at the end
57+
case someOffset: Int if inputAtEnd & someOffset > 0 & someOffset < s.length => // partially entered literal, we are at the end
6058
literalCompletion
6159
case _ => Completions.empty
6260
}
@@ -84,3 +82,5 @@ trait RegexCompletionSupport extends RegexParsers with CompletionSupport {
8482
complete(p, input).completionStrings
8583

8684
}
85+
86+

0 commit comments

Comments
 (0)