From d2a538f6ceae938caadaa476dc31494689832cc3 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Wed, 10 Nov 2021 16:59:07 -1000 Subject: [PATCH 1/4] Add Parser.caret --- .../main/scala/cats/parse/LocationMap.scala | 35 ++++++++++++------- .../src/main/scala/cats/parse/Parser.scala | 28 +++++++++++---- .../scala/cats/parse/LocationMapTest.scala | 32 +++++++++++++++++ .../test/scala/cats/parse/ParserTest.scala | 15 +++++++- 4 files changed, 91 insertions(+), 19 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/LocationMap.scala b/core/shared/src/main/scala/cats/parse/LocationMap.scala index 5adb9fc2..02481f88 100644 --- a/core/shared/src/main/scala/cats/parse/LocationMap.scala +++ b/core/shared/src/main/scala/cats/parse/LocationMap.scala @@ -66,22 +66,29 @@ class LocationMap(val input: String) { */ def toLineCol(offset: Int): Option[(Int, Int)] = if (offset < 0 || offset > input.length) None + else { + val Caret(_, row, col) = toCaretUnsafe(offset) + Some((row, col)) + } + + /** Convert an offset to a Caret. + * @throws IllegalArgumentException + * if offset is longer than input + */ + def toCaretUnsafe(offset: Int): Caret = + if (offset < 0 || offset > input.length) + throw new IllegalArgumentException(s"offset = $offset exceeds ${input.length}") else if (offset == input.length) { // this is end of line - if (offset == 0) Some((0, 0)) + if (offset == 0) Caret.Start else { - toLineCol(offset - 1) - .map { case (line, col) => - if (endsWithNewLine) (line + 1, 0) - else (line, col + 1) - } + val Caret(_, line, col) = toCaretUnsafe(offset - 1) + if (endsWithNewLine) Caret(offset, line + 1, 0) + else Caret(offset, line, col + 1) } } else { val idx = Arrays.binarySearch(firstPos, offset) - if (idx == firstPos.length) { - // greater than all elements - None - } else if (idx < 0) { + if (idx < 0) { // idx = (~(insertion pos) - 1) // The insertion point is defined as the point at which the key would be // inserted into the array: the index of the first element greater than @@ -92,13 +99,17 @@ class LocationMap(val input: String) { // so we are pointing into a row val rowStart = firstPos(row) val col = offset - rowStart - Some((row, col)) + Caret(offset, row, col) } else { // idx is exactly the right value because offset is beginning of a line - Some((idx, 0)) + Caret(offset, idx, 0) } } + def toCaret(offset: Int): Option[Caret] = + if (offset < 0 || offset > input.length) None + else Some(toCaretUnsafe(offset)) + /** return the line without a newline */ def getLine(i: Int): Option[String] = diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index af0c041c..df37e475 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -1608,7 +1608,7 @@ object Parser { case str if Impl.matchesString(str) => str.asInstanceOf[Parser0[String]] case _ => Impl.unmap0(pa) match { - case Impl.Pure(_) | Impl.Index => emptyStringParser0 + case Impl.Pure(_) | Impl.Index | Impl.GetCaret => emptyStringParser0 case notEmpty => Impl.StringP0(notEmpty) } } @@ -1662,6 +1662,11 @@ object Parser { */ def index: Parser0[Int] = Impl.Index + /** return the current Caret (offset, line, column) this is a bit more expensive that just the + * index + */ + def caret: Parser0[Caret] = Impl.GetCaret + /** succeeds when we are at the start */ def start: Parser0[Unit] = Impl.StartParser @@ -1696,7 +1701,7 @@ object Parser { case p1: Parser[_] => as(p1, b) case _ => Impl.unmap0(pa) match { - case Impl.Pure(_) | Impl.Index => pure(b) + case Impl.Pure(_) | Impl.Index | Impl.GetCaret => pure(b) case notPure => Impl.Void0(notPure).map(Impl.ConstFn(b)) } @@ -1816,6 +1821,10 @@ object Parser { var offset: Int = 0 var error: Eval[Chain[Expectation]] = null var capture: Boolean = true + + // This is lazy because we don't want to trigger it + // unless someone uses GetCaret + lazy val locationMap: LocationMap = LocationMap(str) } // invariant: input must be sorted @@ -1864,8 +1873,9 @@ object Parser { final def doesBacktrack(p: Parser0[Any]): Boolean = p match { case Backtrack0(_) | Backtrack(_) | AnyChar | CharIn(_, _, _) | Str(_) | IgnoreCase(_) | - Length(_) | StartParser | EndParser | Index | Pure(_) | Fail() | FailWith(_) | Not(_) | - StringIn(_) => + Length(_) | StartParser | EndParser | Index | GetCaret | Pure(_) | Fail() | FailWith( + _ + ) | Not(_) | StringIn(_) => true case Map0(p, _) => doesBacktrack(p) case Map(p, _) => doesBacktrack(p) @@ -1895,7 +1905,7 @@ object Parser { // and by construction, a oneOf0 never always succeeds final def alwaysSucceeds(p: Parser0[Any]): Boolean = p match { - case Index | Pure(_) => true + case Index | GetCaret | Pure(_) => true case Map0(p, _) => alwaysSucceeds(p) case SoftProd0(a, b) => alwaysSucceeds(a) && alwaysSucceeds(b) case Prod0(a, b) => alwaysSucceeds(a) && alwaysSucceeds(b) @@ -1913,7 +1923,7 @@ object Parser { def unmap0(pa: Parser0[Any]): Parser0[Any] = pa match { case p1: Parser[Any] => unmap(p1) - case Pure(_) | Index => Parser.unit + case GetCaret | Index | Pure(_) => Parser.unit case s if alwaysSucceeds(s) => Parser.unit case Map0(p, _) => // we discard any allocations done by fn @@ -2151,6 +2161,12 @@ object Parser { override def parseMut(state: State): Int = state.offset } + case object GetCaret extends Parser0[Caret] { + override def parseMut(state: State): Caret = + // This unsafe call is safe because the offset can never go too far + state.locationMap.toCaretUnsafe(state.offset) + } + final def backtrack[A](pa: Parser0[A], state: State): A = { val offset = state.offset val a = pa.parseMut(state) diff --git a/core/shared/src/test/scala/cats/parse/LocationMapTest.scala b/core/shared/src/test/scala/cats/parse/LocationMapTest.scala index f77ffd84..6d5d446e 100644 --- a/core/shared/src/test/scala/cats/parse/LocationMapTest.scala +++ b/core/shared/src/test/scala/cats/parse/LocationMapTest.scala @@ -216,4 +216,36 @@ class LocationMapTest extends munit.ScalaCheckSuite { assert(s.endsWith(lm.getLine(lm.lineCount - 1).get)) } } + + property("toLineCol and toCaret are consistent") { + forAll { (s: String, other: Int) => + val lm = LocationMap(s) + (0 to s.length).foreach { offset => + val c = lm.toCaretUnsafe(offset) + val oc = lm.toCaret(offset) + val lc = lm.toLineCol(offset) + + assertEquals(oc, Some(c)) + assertEquals(lc, oc.map { case Caret(_, r, c) => (r, c) }) + } + + if (other < 0 || s.length < other) { + assert(scala.util.Try(lm.toCaretUnsafe(other)).isFailure) + assertEquals(lm.toCaret(other), None) + assertEquals(lm.toLineCol(other), None) + } + } + } + + property("Caret ordering matches offset ordering") { + forAll { (s: String, o1: Int, o2: Int) => + val lm = LocationMap(s) + val c1 = lm.toCaret(o1) + val c2 = lm.toCaret(o2) + + if (c1.isDefined && c2.isDefined) { + assertEquals(Ordering[Option[Caret]].compare(c1, c2), Integer.compare(o1, o2)) + } + } + } } diff --git a/core/shared/src/test/scala/cats/parse/ParserTest.scala b/core/shared/src/test/scala/cats/parse/ParserTest.scala index d0e8b5d7..aca7c29f 100644 --- a/core/shared/src/test/scala/cats/parse/ParserTest.scala +++ b/core/shared/src/test/scala/cats/parse/ParserTest.scala @@ -70,6 +70,9 @@ object ParserGen { def map[A, B](ga: Gen[A])(fn: A => B) = ga.map(fn) } + implicit val cogenCaret: Cogen[Caret] = + Cogen { case Caret(o, row, col) => (o.toLong << 32) | (col.toLong << 16) | (row.toLong) } + def arbGen[A: Arbitrary: Cogen]: GenT[Gen] = GenT(Arbitrary.arbitrary[A]) @@ -516,7 +519,7 @@ object ParserGen { (5, expect0), (1, ignoreCase0), (5, charIn0), - (1, Gen.oneOf(GenT(Parser.start), GenT(Parser.end), GenT(Parser.index))), + (1, Gen.oneOf(GenT(Parser.start), GenT(Parser.end), GenT(Parser.index), GenT(Parser.caret))), (1, fail), (1, failWith), (1, rec.map(void0(_))), @@ -2460,4 +2463,14 @@ class ParserTest extends munit.ScalaCheckSuite { assertEquals(v1.void, v1) } } + + property("P.caret is the same as index + toCaretUnsafe") { + forAll(ParserGen.gen, Arbitrary.arbitrary[String]) { (p, input) => + val v1 = p.fa.void + val lm = LocationMap(input) + val left = (v1 *> Parser.index).map(lm.toCaretUnsafe(_)).parse(input) + val right = (v1 *> Parser.caret).parse(input) + assertEquals(left, right) + } + } } From f757f2c856a271bceabb07dbb072349a91dccf46 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Wed, 10 Nov 2021 17:06:19 -1000 Subject: [PATCH 2/4] actually add Caret --- .../src/main/scala/cats/parse/Caret.scala | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 core/shared/src/main/scala/cats/parse/Caret.scala diff --git a/core/shared/src/main/scala/cats/parse/Caret.scala b/core/shared/src/main/scala/cats/parse/Caret.scala new file mode 100644 index 00000000..9a7f9a8f --- /dev/null +++ b/core/shared/src/main/scala/cats/parse/Caret.scala @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package cats.parse + +import cats.Order + +/** This is a pointer to a zero based row, column, and total offset. + */ +case class Caret(row: Int, col: Int, offset: Int) + +object Caret { + val Start: Caret = Caret(0, 0, 0) + + implicit val caretOrder: Order[Caret] = + new Order[Caret] { + def compare(left: Caret, right: Caret): Int = { + val c0 = Integer.compare(left.row, right.row) + if (c0 != 0) c0 + else { + val c1 = Integer.compare(left.col, right.col) + if (c1 != 0) c1 + else Integer.compare(left.offset, right.offset) + } + } + } + + implicit val caretOrdering: Ordering[Caret] = + caretOrder.toOrdering +} From d3d1b9ec55e6c207179711cb182afc2507a84aee Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Wed, 10 Nov 2021 17:19:26 -1000 Subject: [PATCH 3/4] fix 2.11 compilation --- core/shared/src/test/scala/cats/parse/ParserTest.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/shared/src/test/scala/cats/parse/ParserTest.scala b/core/shared/src/test/scala/cats/parse/ParserTest.scala index aca7c29f..e4c2c72b 100644 --- a/core/shared/src/test/scala/cats/parse/ParserTest.scala +++ b/core/shared/src/test/scala/cats/parse/ParserTest.scala @@ -71,7 +71,9 @@ object ParserGen { } implicit val cogenCaret: Cogen[Caret] = - Cogen { case Caret(o, row, col) => (o.toLong << 32) | (col.toLong << 16) | (row.toLong) } + Cogen { caret: Caret => + (caret.offset.toLong << 32) | (caret.col.toLong << 16) | (caret.row.toLong) + } def arbGen[A: Arbitrary: Cogen]: GenT[Gen] = GenT(Arbitrary.arbitrary[A]) From bcff7b19de43536904404337d07d9d76dc1ced3a Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Fri, 12 Nov 2021 07:46:59 -1000 Subject: [PATCH 4/4] respond to review --- .../main/scala/cats/parse/LocationMap.scala | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/LocationMap.scala b/core/shared/src/main/scala/cats/parse/LocationMap.scala index 02481f88..6234af1c 100644 --- a/core/shared/src/main/scala/cats/parse/LocationMap.scala +++ b/core/shared/src/main/scala/cats/parse/LocationMap.scala @@ -60,29 +60,28 @@ class LocationMap(val input: String) { */ def lineCount: Int = lines.length + def isValidOffset(offset: Int): Boolean = + (0 <= offset && offset <= input.length) + /** Given a string offset return the line and column If input.length is given (EOF) we return the * same value as if the string were one character longer (i.e. if we have appended a non-newline * character at the EOF) */ def toLineCol(offset: Int): Option[(Int, Int)] = - if (offset < 0 || offset > input.length) None - else { - val Caret(_, row, col) = toCaretUnsafe(offset) + if (isValidOffset(offset)) { + val Caret(_, row, col) = toCaretUnsafeImpl(offset) Some((row, col)) - } + } else None - /** Convert an offset to a Caret. - * @throws IllegalArgumentException - * if offset is longer than input - */ - def toCaretUnsafe(offset: Int): Caret = - if (offset < 0 || offset > input.length) - throw new IllegalArgumentException(s"offset = $offset exceeds ${input.length}") - else if (offset == input.length) { + // This does not do bounds checking because we + // don't want to check twice. Callers to this need to + // do bounds check + private def toCaretUnsafeImpl(offset: Int): Caret = + if (offset == input.length) { // this is end of line if (offset == 0) Caret.Start else { - val Caret(_, line, col) = toCaretUnsafe(offset - 1) + val Caret(_, line, col) = toCaretUnsafeImpl(offset - 1) if (endsWithNewLine) Caret(offset, line + 1, 0) else Caret(offset, line, col + 1) } @@ -106,9 +105,17 @@ class LocationMap(val input: String) { } } + /** Convert an offset to a Caret. + * @throws IllegalArgumentException + * if offset is longer than input + */ + def toCaretUnsafe(offset: Int): Caret = + if (isValidOffset(offset)) toCaretUnsafeImpl(offset) + else throw new IllegalArgumentException(s"offset = $offset exceeds ${input.length}") + def toCaret(offset: Int): Option[Caret] = - if (offset < 0 || offset > input.length) None - else Some(toCaretUnsafe(offset)) + if (isValidOffset(offset)) Some(toCaretUnsafeImpl(offset)) + else None /** return the line without a newline */