diff --git a/build.sbt b/build.sbt index ab5bcd5f3..ef9901167 100644 --- a/build.sbt +++ b/build.sbt @@ -90,7 +90,8 @@ val commonSettings = List( } .toList .flatten, - testFrameworks += new TestFramework("weaver.framework.CatsEffect") + testFrameworks += new TestFramework("weaver.framework.CatsEffect"), + tlBspCrossProjectPlatforms := Set(JVMPlatform) ) val root = tlCrossRootProject @@ -230,12 +231,29 @@ lazy val json = crossProject(JVMPlatform, JSPlatform, NativePlatform) name := "fs2-data-json", description := "Streaming JSON manipulation library", libraryDependencies ++= List( - "org.typelevel" %%% "literally" % literallyVersion + "org.typelevel" %%% "literally" % literallyVersion, + "org.typelevel" %%% "cats-parse" % "0.3.9" ) ++ PartialFunction .condOpt(CrossVersion.partialVersion(scalaVersion.value)) { case Some((2, _)) => "org.scala-lang" % "scala-reflect" % scalaVersion.value } - .toList + .toList, + mimaBinaryIssueFilters ++= List( + // all these experimental classes have been made internal + ProblemFilters.exclude[MissingClassProblem]("fs2.data.json.jsonpath.internals.JsonTagger"), + ProblemFilters.exclude[MissingClassProblem]("fs2.data.json.jsonpath.internals.JsonTagger$"), + ProblemFilters.exclude[MissingClassProblem]("fs2.data.json.jsonpath.internals.TaggedJson"), + ProblemFilters.exclude[MissingClassProblem]("fs2.data.json.jsonpath.internals.TaggedJson$"), + ProblemFilters.exclude[MissingClassProblem]("fs2.data.json.jsonpath.internals.TaggedJson$EndArrayElement$"), + ProblemFilters.exclude[MissingClassProblem]("fs2.data.json.jsonpath.internals.TaggedJson$EndObjectValue$"), + ProblemFilters.exclude[MissingClassProblem]("fs2.data.json.jsonpath.internals.TaggedJson$Raw"), + ProblemFilters.exclude[MissingClassProblem]("fs2.data.json.jsonpath.internals.TaggedJson$Raw$"), + ProblemFilters.exclude[MissingClassProblem]("fs2.data.json.jsonpath.internals.TaggedJson$StartArrayElement"), + ProblemFilters.exclude[MissingClassProblem]("fs2.data.json.jsonpath.internals.TaggedJson$StartArrayElement$"), + ProblemFilters.exclude[MissingClassProblem]("fs2.data.json.jsonpath.internals.TaggedJson$StartObjectValue"), + ProblemFilters.exclude[MissingClassProblem]("fs2.data.json.jsonpath.internals.TaggedJson$StartObjectValue$"), + ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.json.jsonpath.package.untag") + ) ) .nativeSettings( tlVersionIntroduced := Map("3" -> "1.5.1", "2.13" -> "1.5.1", "2.12" -> "1.5.1") @@ -416,36 +434,7 @@ lazy val finiteState = crossProject(JVMPlatform, JSPlatform, NativePlatform) .settings( name := "fs2-data-finite-state", description := "Streaming finite state machines", - tlVersionIntroduced := Map("3" -> "1.6.0", "2.13" -> "1.6.0", "2.12" -> "1.6.0"), - mimaBinaryIssueFilters ++= List( - // all filters related to esp.Rhs.Captured* come from converting it from case class to case object - ProblemFilters.exclude[MissingClassProblem]("fs2.data.esp.Rhs$CapturedLeaf"), - ProblemFilters.exclude[MissingTypesProblem]("fs2.data.esp.Rhs$CapturedLeaf$"), - ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedLeaf.apply"), - ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedLeaf.unapply"), - ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedTree.name"), - ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedTree.copy"), - ProblemFilters.exclude[IncompatibleResultTypeProblem]("fs2.data.esp.Rhs#CapturedTree.copy$default$1"), - ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedTree.copy$default$2"), - ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedTree.this"), - ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedTree.apply"), - ProblemFilters.exclude[IncompatibleResultTypeProblem]("fs2.data.esp.Rhs#CapturedLeaf.fromProduct"), - ProblemFilters.exclude[IncompatibleResultTypeProblem]("fs2.data.esp.Rhs#CapturedTree._1"), - ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedTree._2"), - ProblemFilters.exclude[ReversedMissingMethodProblem]( - "fs2.data.mft.MFTBuilder#Guardable.fs2$data$mft$MFTBuilder$Guardable$$$outer"), - // rules now only have number of parameters - ProblemFilters.exclude[IncompatibleMethTypeProblem]("fs2.data.mft.Rules.apply"), - ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.mft.Rules.params"), - ProblemFilters.exclude[IncompatibleMethTypeProblem]("fs2.data.mft.Rules.copy"), - ProblemFilters.exclude[IncompatibleResultTypeProblem]("fs2.data.mft.Rules.copy$default$1"), - ProblemFilters.exclude[IncompatibleMethTypeProblem]("fs2.data.mft.Rules.this"), - ProblemFilters.exclude[IncompatibleMethTypeProblem]("fs2.data.mft.Rules.apply"), - ProblemFilters.exclude[IncompatibleResultTypeProblem]("fs2.data.mft.Rules._1"), - // Removal of experimental class - ProblemFilters.exclude[MissingFieldProblem]("fs2.data.esp.Tag.True"), - ProblemFilters.exclude[MissingClassProblem]("fs2.data.esp.Tag$True$") - ) + tlMimaPreviousVersions := Set.empty // experimental module, no compatbility guarantess ) .jsSettings( scalaJSLinkerConfig ~= (_.withModuleKind(ModuleKind.CommonJSModule)) diff --git a/finite-state/shared/src/main/scala/fs2/data/mft/query/Query.scala b/finite-state/shared/src/main/scala/fs2/data/mft/query/Query.scala index cd9f5c6b8..154dcb10e 100644 --- a/finite-state/shared/src/main/scala/fs2/data/mft/query/Query.scala +++ b/finite-state/shared/src/main/scala/fs2/data/mft/query/Query.scala @@ -35,4 +35,8 @@ object Query { case class Leaf[Tag, Path](tag: Tag) extends Query[Tag, Path] case class Sequence[Tag, Path](elements: NonEmptyList[Query[Tag, Path]]) extends Query[Tag, Path] case class LeafFunction[Tag, Path](f: Tag => Either[String, Tag]) extends Query[Tag, Path] + + def empty[Tag, Path]: Query[Tag, Path] = Empty() + def variable[Tag, Path](v: String): Query[Tag, Path] = Variable(v) + def node[Tag, Path](tag: Tag, child: Query[Tag, Path]): Query[Tag, Path] = Node(tag, child) } diff --git a/finite-state/shared/src/main/scala/fs2/data/mft/query/QueryCompiler.scala b/finite-state/shared/src/main/scala/fs2/data/mft/query/QueryCompiler.scala index a3cbcebfa..2979a0a9f 100644 --- a/finite-state/shared/src/main/scala/fs2/data/mft/query/QueryCompiler.scala +++ b/finite-state/shared/src/main/scala/fs2/data/mft/query/QueryCompiler.scala @@ -28,12 +28,14 @@ import cats.data.NonEmptyList * The compiler is based on the approach described in [[https://doi.org/10.1109/ICDE.2014.6816714 _XQuery Streaming by Forest Transducers_]] * and generalized for the abstract query language on trees. */ -private[fs2] abstract class QueryCompiler[Tag, Path] { +private[fs2] abstract class QueryCompiler[InTag, OutTag, Path] { type Matcher type Pattern type Guard + protected val emitSelected: Boolean = true + /** A single char to be matched in a path */ type Char @@ -56,7 +58,7 @@ private[fs2] abstract class QueryCompiler[Tag, Path] { def cases(matcher: Matcher): List[(Pattern, List[Guard])] /** Return the constructor tag of this pattern, or `None` if it is a wildcard. */ - def tagOf(pattern: Pattern): Option[Tag] + def tagOf(pattern: Pattern): Option[InTag] /** Compiles the `query` into an [[MFT Macro Forest Transducer]]. * The `credit` parameter defines the maximum number of optimization passes that @@ -67,8 +69,8 @@ private[fs2] abstract class QueryCompiler[Tag, Path] { * * If you do not want to perform any optimization, you can set this value to `0`. */ - def compile(query: Query[Tag, Path], credit: Int = 50): MFT[NonEmptyList[Guard], Tag, Tag] = { - val mft = dsl[NonEmptyList[Guard], Tag, Tag] { implicit builder => + def compile(query: Query[OutTag, Path], credit: Int = 50): MFT[NonEmptyList[Guard], InTag, OutTag] = { + val mft = dsl[NonEmptyList[Guard], InTag, OutTag] { implicit builder => val q0 = state(args = 0, initial = true) val qinit = state(args = 1) val qcopy = state(args = 0) @@ -122,9 +124,12 @@ private[fs2] abstract class QueryCompiler[Tag, Path] { val pat: builder.Guardable = tagOf(pattern).fold(anyNode)(aNode(_)) if (!finalTgt) { q1(pat.when(guard)) -> q2(x1, copyArgs: _*) ~ q1(x2, copyArgs: _*) - } else { + } else if (emitSelected) { q1(pat.when(guard)) -> end(x1, (copyArgs :+ copy(qcopy(x1))): _*) ~ q2(x1, copyArgs: _*) ~ q1(x2, copyArgs: _*) + } else { + q1(pat.when(guard)) -> end(x1, (copyArgs :+ qcopy(x1)): _*) ~ q2(x1, copyArgs: _*) ~ + q1(x2, copyArgs: _*) } states1 } @@ -134,7 +139,7 @@ private[fs2] abstract class QueryCompiler[Tag, Path] { } } - def translate(query: Query[Tag, Path], vars: List[String], q: builder.StateBuilder): Unit = + def translate(query: Query[OutTag, Path], vars: List[String], q: builder.StateBuilder): Unit = query match { case Query.Empty() => q(any) -> eps @@ -192,7 +197,7 @@ private[fs2] abstract class QueryCompiler[Tag, Path] { // compile and sequence every query in the sequence val rhs = - queries.foldLeft[Rhs[Tag]](eps) { (acc, query) => + queries.foldLeft[Rhs[OutTag]](eps) { (acc, query) => val q1 = state(args = q.nargs) // translate the query @@ -211,7 +216,7 @@ private[fs2] abstract class QueryCompiler[Tag, Path] { translate(query, List("$input"), qinit) } // apply some optimizations until nothing changes or credit is exhausted - def optimize(mft: MFT[NonEmptyList[Guard], Tag, Tag], credit: Int): MFT[NonEmptyList[Guard], Tag, Tag] = + def optimize(mft: MFT[NonEmptyList[Guard], InTag, OutTag], credit: Int): MFT[NonEmptyList[Guard], InTag, OutTag] = if (credit > 0) { val mft1 = mft.removeUnusedParameters.inlineStayMoves.removeUnreachableStates if (mft1.rules == mft.rules) diff --git a/finite-state/shared/src/main/scala/fs2/data/pfsa/Pred.scala b/finite-state/shared/src/main/scala/fs2/data/pfsa/Pred.scala index 9f1e9f314..b9f709e12 100644 --- a/finite-state/shared/src/main/scala/fs2/data/pfsa/Pred.scala +++ b/finite-state/shared/src/main/scala/fs2/data/pfsa/Pred.scala @@ -20,7 +20,7 @@ package fs2.data.pfsa trait Pred[P, Elt] { /** Whether the element `e` satisfies the predicate. */ - def satsifies(p: P)(e: Elt): Boolean + def satisfies(p: P)(e: Elt): Boolean /** The predicate that is always true. */ def always: P @@ -56,7 +56,7 @@ object Pred { implicit class PredOps[P](val p1: P) extends AnyVal { def satisfies[Elt](e: Elt)(implicit P: Pred[P, Elt]): Boolean = - P.satsifies(p1)(e) + P.satisfies(p1)(e) def &&[Elt](p2: P)(implicit P: Pred[P, Elt]): P = P.and(p1, p2) diff --git a/finite-state/shared/src/main/scala/fs2/data/pfsa/Regular.scala b/finite-state/shared/src/main/scala/fs2/data/pfsa/Regular.scala index aad37fdd9..a8d2d2b58 100644 --- a/finite-state/shared/src/main/scala/fs2/data/pfsa/Regular.scala +++ b/finite-state/shared/src/main/scala/fs2/data/pfsa/Regular.scala @@ -40,6 +40,9 @@ sealed abstract class Regular[CharSet] { Regular.empty } + def ?(implicit CharSet: Pred[CharSet, _], eq: Eq[CharSet]): Regular[CharSet] = + this || Regular.empty + def &&(that: Regular[CharSet])(implicit CharSet: Pred[CharSet, _], eq: Eq[CharSet]): Regular[CharSet] = (this, that) match { case (Regular.And(re1, re2), _) => re1 && (re2 && that) @@ -116,7 +119,7 @@ sealed abstract class Regular[CharSet] { def derive[C](c: C)(implicit CharSet: Pred[CharSet, C], eq: Eq[CharSet]): Regular[CharSet] = this match { case Regular.Epsilon() => Regular.Chars(CharSet.never) - case Regular.Chars(set) if CharSet.satsifies(set)(c) => Regular.Epsilon() + case Regular.Chars(set) if CharSet.satisfies(set)(c) => Regular.Epsilon() case Regular.Chars(_) => Regular.Chars(CharSet.never) case Regular.Concatenation(re1, re2) if re1.acceptEpsilon => (re1.derive(c) ~ re2) || re2.derive(c) @@ -184,7 +187,8 @@ sealed abstract class Regular[CharSet] { transitions: Map[Int, List[(CharSet, Int)]], re: Regular[CharSet]): (Chain[Regular[CharSet]], Map[Int, List[(CharSet, Int)]]) = { val q = qs.size.toInt - 1 - re.classes.foldLeft((qs, transitions)) { case ((qs, transitions), cs) => + val cls = re.classes + cls.foldLeft((qs, transitions)) { case ((qs, transitions), cs) => goto(re, q, cs, qs, transitions) } } @@ -231,7 +235,7 @@ object Regular { implicit def pred[CharSet: Eq, C](implicit CharSet: Pred[CharSet, C]): Pred[Regular[CharSet], C] = new Pred[Regular[CharSet], C] { - override def satsifies(p: Regular[CharSet])(e: C): Boolean = + override def satisfies(p: Regular[CharSet])(e: C): Boolean = p match { case Epsilon() => false case Chars(set) => set.satisfies(e) diff --git a/finite-state/shared/src/test/scala/fs2/data/mft/QuerySpec.scala b/finite-state/shared/src/test/scala/fs2/data/mft/QuerySpec.scala index ae100f827..4c4f3cc2a 100644 --- a/finite-state/shared/src/test/scala/fs2/data/mft/QuerySpec.scala +++ b/finite-state/shared/src/test/scala/fs2/data/mft/QuerySpec.scala @@ -57,7 +57,7 @@ abstract class QuerySpec(credit: Int) extends SimpleIOSuite { } - object MiniXQueryCompiler extends QueryCompiler[String, MiniXPath] { + object MiniXQueryCompiler extends QueryCompiler[String, String, MiniXPath] { type Matcher = Set[String] type Char = String @@ -66,7 +66,7 @@ abstract class QuerySpec(credit: Int) extends SimpleIOSuite { override implicit object predicate extends Pred[Matcher, Char] { - override def satsifies(p: Matcher)(e: Char): Boolean = p.contains(e) + override def satisfies(p: Matcher)(e: Char): Boolean = p.contains(e) override val always: Matcher = Set("a", "b", "c", "d", "doc") diff --git a/finite-state/shared/src/test/scala/fs2/data/pfsa/RegularSpec.scala b/finite-state/shared/src/test/scala/fs2/data/pfsa/RegularSpec.scala index fa98d1c70..9a859a765 100644 --- a/finite-state/shared/src/test/scala/fs2/data/pfsa/RegularSpec.scala +++ b/finite-state/shared/src/test/scala/fs2/data/pfsa/RegularSpec.scala @@ -39,7 +39,7 @@ object RegularSpec extends SimpleIOSuite with Checkers { implicit object CharSetInstances extends Pred[Set[Char], Char] with Candidate[Set[Char], Char] { - override def satsifies(p: Set[Char])(e: Char): Boolean = p.contains(e) + override def satisfies(p: Set[Char])(e: Char): Boolean = p.contains(e) override val always: Set[Char] = Set('a', 'b') diff --git a/json/src/main/scala-2/fs2/data/json/jq/literals.scala b/json/src/main/scala-2/fs2/data/json/jq/literals.scala new file mode 100644 index 000000000..638cc311f --- /dev/null +++ b/json/src/main/scala-2/fs2/data/json/jq/literals.scala @@ -0,0 +1,81 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package json +package jq + +import cats.data.NonEmptyList +import cats.syntax.all._ +import org.typelevel.literally.Literally + +import scala.annotation.unused +import scala.reflect.macros.blackbox.Context + +object literals { + + implicit class JqStringContext(val sc: StringContext) extends AnyVal { + def jq(args: Any*): Jq = macro JqInterpolator.make + } + + trait LiftableImpls { + val c: Context + import c.universe._ + + implicit def nel[T](implicit @unused T: Liftable[T]): Liftable[NonEmptyList[T]] = Liftable[NonEmptyList[T]] { + case NonEmptyList(t, Nil) => q"_root_.cats.data.NonEmptyList.one($t)" + case NonEmptyList(t, tail) => q"_root_.cats.data.NonEmptyList($t, $tail)" + } + + implicit lazy val jqLiftable: Liftable[Jq] = Liftable[Jq] { + case Jq.Root => q"_root_.fs2.data.json.jq.Jq.Root" + case Jq.Identity => q"_root_.fs2.data.json.jq.Jq.Identity" + case Jq.Field(name) => q"_root_.fs2.data.json.jq.Jq.Field($name)" + case Jq.Index(idx) => q"_root_.fs2.data.json.jq.Jq.Index($idx)" + case Jq.Slice(idx1, idx2) => q"_root_.fs2.data.json.jq.Jq.Slice($idx1, $idx2)" + case Jq.Child => q"_root_.fs2.data.json.jq.Jq.Child" + case Jq.RecursiveDescent => q"_root_.fs2.data.json.jq.Jq.RecursiveDescent" + case Jq.Sequence(qs) => + q"_root_.fs2.data.json.jq.Jq.Sequence(_root_.cats.data.NonEmptyChain.fromNonEmptyList(${qs.toNonEmptyList.widen[Jq]}))" + case Jq.Iterator(filter, inner) => q"_root_.fs2.data.json.jq.Jq.Iterator(${filter: Jq}, $inner)" + case Jq.Arr(pfx, qs) => q"_root_.fs2.data.json.jq.Jq.Arr(${pfx: Jq}, $qs)" + case Jq.Obj(pfx, qs) => q"_root_.fs2.data.json.jq.Jq.Obj(${pfx: Jq}, $qs)" + case Jq.Num(n) => q"_root_.fs2.data.json.jq.Jq.Num($n)" + case Jq.Str(s) => q"_root_.fs2.data.json.jq.Jq.Str($s)" + case Jq.Bool(b) => q"_root_.fs2.data.json.jq.Jq.Bool($b)" + case Jq.Null => q"_root_.fs2.data.json.jq.Jq.Null" + } + } + + object JqInterpolator extends Literally[Jq] { + + def validate(ctx: Context)(string: String): Either[String, ctx.Expr[Jq]] = { + import ctx.universe._ + val liftables = new LiftableImpls { + val c: ctx.type = ctx + } + import liftables._ + JqParser + .either(string) + .leftMap(_.getMessage) + .map(p => c.Expr(q"$p")) + } + + def make(c: Context)(args: c.Expr[Any]*): c.Expr[Jq] = apply(c)(args: _*) + + } +} diff --git a/json/src/main/scala-3/fs2/data/json/jq/literals.scala b/json/src/main/scala-3/fs2/data/json/jq/literals.scala new file mode 100644 index 000000000..c620588d6 --- /dev/null +++ b/json/src/main/scala-3/fs2/data/json/jq/literals.scala @@ -0,0 +1,90 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package json +package jq + +import cats.syntax.all.* +import cats.data.{NonEmptyChain, NonEmptyList} + +import scala.quoted.* + +import org.typelevel.literally.Literally + +import scala.language.experimental.macros + +package object literals { + + extension (inline ctx: StringContext) { + inline def jq(inline args: Any*): Jq = ${ JqInterpolator('ctx, 'args) } + } + + given [T](using ToExpr[T], Type[T]): ToExpr[NonEmptyList[T]] with { + def apply(nel: NonEmptyList[T])(using Quotes) = nel match { + case NonEmptyList(t, Nil) => '{ NonEmptyList.one(${ Expr(t) }) } + case NonEmptyList(t, tail) => '{ NonEmptyList(${ Expr(t) }, ${ Expr(tail) }) } + } + } + + given ToExpr[SimpleFilter] with { + def apply(f: SimpleFilter)(using Quotes) = + f match { + case Jq.Root => '{ Jq.Root } + case Jq.Identity => '{ Jq.Identity } + case Jq.Field(name) => '{ Jq.Field(${ Expr(name) }) } + case Jq.Index(idx) => '{ Jq.Index(${ Expr(idx) }) } + case Jq.Slice(idx1, idx2) => '{ Jq.Slice(${ Expr(idx1) }, ${ Expr(idx2) }) } + case Jq.Child => '{ Jq.Child } + case Jq.RecursiveDescent => '{ Jq.RecursiveDescent } + } + } + + given ToExpr[Filter] with { + def apply(f: Filter)(using Quotes) = + f match { + case Jq.Sequence(qs) => + '{ Jq.Sequence(NonEmptyChain.fromNonEmptyList(${ Expr(qs.toNonEmptyList) })) } + case simple: SimpleFilter => Expr(simple) + } + } + + given ToExpr[Jq] with { + def apply(q: Jq)(using Quotes) = + q match { + case f: Filter => Expr(f) + case Jq.Iterator(filter, inner) => '{ Jq.Iterator(${ Expr(filter) }, ${ Expr(inner) }) } + case Jq.Arr(pfx, qs) => '{ Jq.Arr(${ Expr(pfx) }, ${ Expr(qs) }) } + case Jq.Obj(pfx, qs) => '{ Jq.Obj(${ Expr(pfx) }, ${ Expr(qs) }) } + case Jq.Num(n) => '{ Jq.Num(${ Expr(n) }) } + case Jq.Str(s) => '{ Jq.Str(${ Expr(s) }) } + case Jq.Bool(b) => '{ Jq.Bool(${ Expr(b) }) } + case Jq.Null => '{ Jq.Null } + } + } + + object JqInterpolator extends Literally[Jq] { + + def validate(string: String)(using Quotes) = { + JqParser + .either(string) + .leftMap(_.getMessage) + .map(Expr(_)) + } + + } +} diff --git a/json/src/main/scala/fs2/data/json/jq/CompiledJq.scala b/json/src/main/scala/fs2/data/json/jq/CompiledJq.scala new file mode 100644 index 000000000..1c13919ba --- /dev/null +++ b/json/src/main/scala/fs2/data/json/jq/CompiledJq.scala @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package json +package jq + +/** A pipe running the compiled jq query on the input stream of JSON tokens. */ +trait CompiledJq[F[_]] extends Pipe[F, Token, Token] { + + /** Allows for piping `this` compiled jq query feeding its result to `that` compiled jq query. */ + def andThen(that: CompiledJq[F]): CompiledJq[F] + + /** Alias for `andThen`. */ + def |(that: CompiledJq[F]): CompiledJq[F] = andThen(that) + +} diff --git a/json/src/main/scala/fs2/data/json/jq/Compiler.scala b/json/src/main/scala/fs2/data/json/jq/Compiler.scala new file mode 100644 index 000000000..9d43ac43f --- /dev/null +++ b/json/src/main/scala/fs2/data/json/jq/Compiler.scala @@ -0,0 +1,40 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package json +package jq + +import cats.{Defer, MonadThrow} + +import internal.ESPJqCompiler + +case class JqException(msg: String) extends Exception(msg) + +/** A compiler for jq queries into some compiled form. */ +trait Compiler[F[_]] { + + def compile(jq: Jq): F[CompiledJq[F]] + +} + +object Compiler { + + def apply[F[_]: MonadThrow: Defer]: Compiler[F] = + new ESPJqCompiler[F] + +} diff --git a/json/src/main/scala/fs2/data/json/jq/JqParser.scala b/json/src/main/scala/fs2/data/json/jq/JqParser.scala new file mode 100644 index 000000000..973113b54 --- /dev/null +++ b/json/src/main/scala/fs2/data/json/jq/JqParser.scala @@ -0,0 +1,168 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2.data.json.jq + +import cats.data.NonEmptyList +import cats.parse.{Accumulator0, Appender, Numbers, Parser => P, Parser0} +import cats.syntax.all._ +import cats.{MonadThrow, Monoid} + +case class JqParserException(error: P.Error) extends Exception(error.show) + +object JqParser { + + implicit def accForMonoid[T: Monoid]: Accumulator0[T, T] = new Accumulator0[T, T] { + override def newAppender(): Appender[T, T] = new Appender[T, T] { + private[this] var res = Monoid[T].empty + override def append(item: T) = { + res = res.combine(item) + this + } + override def finish(): T = res + } + + } + + private val whitespace: P[Char] = P.charIn(" \t\r\n") + private val whitespace0: Parser0[Unit] = whitespace.rep0.void + + private val identifierChar: P[Unit] = + P.charIn(('a' to 'z') ++ ('A' to 'Z') ++ ('0' to '9') ++ "-_").void + private val identifier: P[String] = + (P.charIn(('a' to 'z') ++ ('A' to 'Z')) ~ identifierChar.rep0) + .withContext("identifier") + .string <* whitespace0 + + private def kw(kw: String): P[Unit] = + (P.string(kw) ~ !identifierChar).void <* whitespace0 + + private val kwTrue: P[Unit] = kw("true") + private val kwFalse: P[Unit] = kw("false") + private val kwNull: P[Unit] = kw("null") + + private def ch(c: Char): P[Unit] = + P.char(c) <* whitespace0 + + private def str(s: String): P[Unit] = + P.string(s) <* whitespace0 + + private val string: P[String] = + P.char('"') *> P + .oneOf( + P.charsWhile(c => c != '"' && c != '\\') :: + (P.char('\\') *> P.fromCharMap(Map('"' -> "\"", '\\' -> "\\"))) :: + Nil) + .repAs0[String] <* ch('"') + + private val index: P[Int] = + Numbers.nonNegativeIntString.mapFilter(s => Either.catchNonFatal(s.toInt).toOption) <* whitespace0 + + private val filter: P[Filter] = { + val access: P[Filter] = + P.oneOf( + string.map(Jq.Field(_)) :: + (index ~ (ch(':') *> index.?).?) + .collect { + case (idx, None) => + Jq.Index(idx) + case (idx1, Some(Some(idx2))) if idx1 == idx2 => + Jq.Index(idx1) + case (min, Some(max)) if max.forall(min < _) => + Jq.Slice(min, max) + } :: + (ch(':') *> index.map(max => Jq.Slice(0, Some(max)))) :: + Nil) + .between(ch('['), ch(']')) + .withContext("string, index, slice 'min:max' (with min <= max), slice 'idx:', or slice ':idx'") + + val step: P[Filter] = + (ch('.') *> P + .oneOf( + identifier.map(Jq.Field(_)) :: + access.backtrack :: + Nil) + .? ~ access.backtrack.repAs0[Filter]) + .map { + case (Some(fst), snd) => fst ~ snd + case (None, access) => Jq.Identity ~ access + } + .repAs[Filter] + + P.oneOf( + (str("..") *> (access.backtrack ~ step.repAs0[Filter]).?).map { + case Some((access, rest)) => Jq.RecursiveDescent ~ access ~ rest + case None => Jq.RecursiveDescent + } :: + step :: + Nil) + // repSepAs would be great here + .repSep(ch('|')) + .map { + case NonEmptyList(filter, Nil) => filter + case steps => steps.reduceLeft(_ ~ _) + } + } + + private val selector: P[(Filter, Jq => Jq)] = P.recursive[(Filter, Jq => Jq)] { selector => + (filter ~ (str("[]") *> selector.?).?).map { + case (prefix, None) => + (prefix, identity) + case (prefix, Some(None)) => + (Jq.Identity, Jq.Iterator(prefix, _)) + case (prefix1, Some(Some((prefix2, f)))) => + (prefix2, inner => Jq.Iterator(prefix1, f(inner))) + } + } + + private val query: P[Jq] = P.recursive[Jq] { query => + val constructor: P[Filter => Constructor] = + P.oneOf( + query + .repSep0(ch(',')) + .with1 + .between(ch('['), ch(']')) + .map[Filter => Constructor](fs => prefix => Jq.Arr(prefix, fs)) :: + (string ~ (ch(':') *> query)) + .repSep0(ch(',')) + .with1 + .between(ch('{'), ch('}')) + .map[Filter => Constructor](fs => prefix => Jq.Obj(prefix, fs)) :: + string.map[Filter => Constructor](s => _ => Jq.Str(s)) :: + kwTrue.as[Filter => Constructor](_ => Jq.Bool(true)) :: + kwFalse.as[Filter => Constructor](_ => Jq.Bool(false)) :: + kwNull.as[Filter => Constructor](_ => Jq.Null) :: + Numbers.jsonNumber.map[Filter => Constructor](n => _ => Jq.Num(n)) :: + Nil) + + whitespace0.with1 *> + P.oneOf( + (selector ~ (ch('|') *> constructor).?).map { + case ((filter, f), Some(cst)) => + f(cst(filter)) + case ((filter, f), None) => f(filter) + } :: + constructor.map(cst => cst(Jq.Identity)) :: + Nil) + } + + def parse[F[_]](input: String)(implicit F: MonadThrow[F]): F[Jq] = + either(input).liftTo[F] + + def either(input: String): Either[Throwable, Jq] = + (query <* P.end).parseAll(input).leftMap(JqParserException(_)) + +} diff --git a/json/src/main/scala/fs2/data/json/jq/PipedCompiledJq.scala b/json/src/main/scala/fs2/data/json/jq/PipedCompiledJq.scala new file mode 100644 index 000000000..44c43b22e --- /dev/null +++ b/json/src/main/scala/fs2/data/json/jq/PipedCompiledJq.scala @@ -0,0 +1,38 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package json +package jq + +import cats.data.NonEmptyChain + +/** Represents a sequence of piped compiled jq queries, applying every query to the result of the previous one. */ +class PipedCompiledJq[F[_]](val jqs: NonEmptyChain[CompiledJq[F]]) extends CompiledJq[F] { + + override def apply(in: fs2.Stream[F, Token]): fs2.Stream[F, Token] = + jqs.foldLeft(in)((base, jq) => base.through(jq)) + + override def andThen(that: CompiledJq[F]): CompiledJq[F] = + that match { + case that: PipedCompiledJq[F] => + new PipedCompiledJq[F](this.jqs ++ that.jqs) + case _ => + new PipedCompiledJq[F](this.jqs :+ that) + } + +} diff --git a/json/src/main/scala/fs2/data/json/jq/TaggedMatcher.scala b/json/src/main/scala/fs2/data/json/jq/TaggedMatcher.scala new file mode 100644 index 000000000..33504261e --- /dev/null +++ b/json/src/main/scala/fs2/data/json/jq/TaggedMatcher.scala @@ -0,0 +1,194 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2.data +package json +package jq + +import cats.data.NonEmptyList +import cats.syntax.all._ +import cats.{Eq, Show} + +import pfsa._ +import tagged._ +import Pred.syntax._ + +private sealed trait TaggedMatcher { + def dnf: NonEmptyList[NonEmptyList[AtomTaggedMatcher]] = + this match { + case TaggedMatcher.AllOf(clauses) => + clauses.toList match { + case Nil => NonEmptyList.one(NonEmptyList.one(TaggedMatcher.Fail)) + case c :: Nil => c.dnf + case c :: rest => + val dnf1 = c.dnf + val dnf2 = TaggedMatcher.AllOf(rest.toSet).dnf + dnf1.flatMap(conj1 => dnf2.map(conj2 => conj1.concatNel(conj2))) + } + case TaggedMatcher.AnyOf(clauses) => + NonEmptyList + .fromList(clauses.toList) + .getOrElse(NonEmptyList.one(TaggedMatcher.Fail)) + .flatMap(_.dnf) + case atom: AtomTaggedMatcher => + NonEmptyList.one(NonEmptyList.one(atom)) + } +} +private sealed trait AtomTaggedMatcher extends TaggedMatcher +private sealed trait PatternTaggedMatcher extends AtomTaggedMatcher +private sealed trait GuardTaggedMatcher extends AtomTaggedMatcher +private sealed trait NegatableTaggedMatcher extends AtomTaggedMatcher +private object TaggedMatcher { + + case object StartJson extends PatternTaggedMatcher with NegatableTaggedMatcher + case object StartObject extends PatternTaggedMatcher with NegatableTaggedMatcher + case object StartArray extends PatternTaggedMatcher with NegatableTaggedMatcher + + case class Field(name: String) extends PatternTaggedMatcher with NegatableTaggedMatcher + case class Index(idx: Int) extends PatternTaggedMatcher with NegatableTaggedMatcher + case class Slice(start: Int, end: Option[Int]) extends GuardTaggedMatcher with NegatableTaggedMatcher + case object Any extends PatternTaggedMatcher + case object Fail extends AtomTaggedMatcher + + case class AnyOf(m: Set[TaggedMatcher]) extends TaggedMatcher + case class AllOf(m: Set[TaggedMatcher]) extends TaggedMatcher + case class Not(p: NegatableTaggedMatcher) extends GuardTaggedMatcher + + implicit object TaggedInstances extends Pred[TaggedMatcher, TaggedJson] with Candidate[TaggedMatcher, TaggedJson] { + + override def pick(set: TaggedMatcher): Option[TaggedJson] = + set match { + case StartJson => Some(TaggedJson.StartJson) + case StartObject => Some(TaggedJson.Raw(Token.StartObject)) + case StartArray => Some(TaggedJson.Raw(Token.StartArray)) + case Field(name) => Some(TaggedJson.StartObjectValue(name)) + case Index(idx) => Some(TaggedJson.StartArrayElement(idx)) + case Slice(start, _) => Some(TaggedJson.StartArrayElement(start)) + case Any => Some(TaggedJson.Raw(Token.StartObject)) + case Fail => None + case AnyOf(m) => m.headOption.flatMap(pick(_)) + case AllOf(m) => m.headOption.flatMap(pick(_)) + case Not(StartJson) => Some(TaggedJson.Raw(Token.StartArray)) + case Not(StartObject) => Some(TaggedJson.Raw(Token.StartArray)) + case Not(StartArray) => Some(TaggedJson.Raw(Token.StartObject)) + case Not(Field(name)) => Some(TaggedJson.StartObjectValue(s"!$name")) + case Not(Index(idx)) => Some(TaggedJson.StartArrayElement(idx + 1)) + case Not(Slice(start, _)) => Some(TaggedJson.StartArrayElement(start - 1)) + } + + override def satisfies(p: TaggedMatcher)(e: TaggedJson): Boolean = + (p, e) match { + case (StartJson, TaggedJson.StartJson) => true + case (StartObject, TaggedJson.Raw(Token.StartObject)) => true + case (StartArray, TaggedJson.Raw(Token.StartArray)) => true + case (Field(name1), TaggedJson.StartObjectValue(name2)) => name1 === name2 + case (Index(idx1), TaggedJson.StartArrayElement(idx2)) => idx1 === idx2 + case (Slice(start, end), TaggedJson.StartArrayElement(idx)) => idx >= start && end.forall(idx < _) + case (Any, _) => true + case (Fail, _) => false + case (AnyOf(ps), _) => ps.exists(satisfies(_)(e)) + case (AllOf(ps), _) => ps.forall(satisfies(_)(e)) + case (Not(p), _) => !satisfies(p)(e) + case (_, _) => false + } + + override def always: TaggedMatcher = Any + + override def never: TaggedMatcher = Fail + + override def and(p1: TaggedMatcher, p2: TaggedMatcher): TaggedMatcher = + (p1, p2) match { + case (StartObject, StartObject) => StartObject + case (StartObject, _) => Fail + case (_, StartObject) => Fail + case (StartArray, StartArray) => StartArray + case (StartArray, _) => Fail + case (_, StartArray) => Fail + case (Any, _) => p2 + case (_, Any) => p1 + case (Fail, _) => Fail + case (_, Fail) => Fail + case (AllOf(ps1), AllOf(ps2)) => AllOf(ps1 ++ ps2) + case (AllOf(ps1), _) => AllOf(ps1 + p2) + case (_, AllOf(ps2)) => AllOf(ps2 + p1) + case (Field(f1), Field(f2)) => if (f1 === f2) p1 else Fail + case (Field(_), Index(_) | Slice(_, _)) => Fail + case (Index(_) | Slice(_, _), Field(_)) => Fail + case (Index(idx1), Index(idx2)) => if (idx1 === idx2) p1 else Fail + case (Index(idx), Slice(start, end)) => if (idx >= start && end.forall(idx < _)) p1 else Fail + case (Slice(start, end), Index(idx)) => if (idx >= start && end.forall(idx < _)) p1 else Fail + case (_, _) => if (p1 === p2) p1 else AllOf(Set(p1, p2)) + } + + override def or(p1: TaggedMatcher, p2: TaggedMatcher): TaggedMatcher = + (p1, p2) match { + case (AnyOf(ps1), AnyOf(ps2)) => AnyOf(ps1 ++ ps2) + case (Any, _) => Any + case (_, Any) => Any + case (Fail, _) => p2 + case (_, Fail) => p1 + case (AnyOf(ps1), _) => AnyOf(ps1 + p2) + case (_, AnyOf(ps2)) => AnyOf(ps2 + p1) + case (Index(idx), Slice(start, end)) if idx >= start && end.forall(idx < _) => p2 + case (Slice(start, end), Index(idx)) if idx >= start && end.forall(idx < _) => p1 + case (_, _) => if (p1 === p2) p1 else AnyOf(Set(p1, p2)) + } + + override def not(p: TaggedMatcher): TaggedMatcher = + p match { + case Not(p) => p + case Fail => Any + case Any => Fail + case AllOf(ps) => ps.fold(never)((acc, p) => or(acc, not(p))) + case AnyOf(ps) => ps.fold(always)((acc, p) => and(acc, not(p))) + case p: NegatableTaggedMatcher => Not(p) + } + + override def isSatisfiable(p: TaggedMatcher): Boolean = + p match { + case Fail => false + case AnyOf(cases) => cases.exists(_.isSatisfiable) + case AllOf(cases) => cases.forall(_.isSatisfiable) + case _ => true + } + + } + + implicit val eq: Eq[TaggedMatcher] = Eq.fromUniversalEquals + +} + +private object NegatableTaggedMatcher { + + implicit val show: Show[NegatableTaggedMatcher] = { + case TaggedMatcher.StartJson => ". != $" + case TaggedMatcher.StartObject => ". != {" + case TaggedMatcher.StartArray => ". != [" + case TaggedMatcher.Field(name) => show". != {$name}" + case TaggedMatcher.Index(idx) => show". != [$idx]" + case TaggedMatcher.Slice(start, Some(end)) => show". not in [$start..$end]" + case TaggedMatcher.Slice(start, None) => show". != [$start]" + } + +} + +private object GuardTaggedMatcher { + implicit val show: Show[GuardTaggedMatcher] = { + case TaggedMatcher.Not(p) => show"$p" + case TaggedMatcher.Slice(start, Some(end)) => show". in [$start..$end]" + case TaggedMatcher.Slice(start, None) => show". == [$start]" + } +} diff --git a/json/src/main/scala/fs2/data/json/jq/ast.scala b/json/src/main/scala/fs2/data/json/jq/ast.scala new file mode 100644 index 000000000..7556678ca --- /dev/null +++ b/json/src/main/scala/fs2/data/json/jq/ast.scala @@ -0,0 +1,66 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2.data.json.jq + +import cats.data.NonEmptyChain +import cats.kernel.Monoid + +sealed trait Jq +sealed trait Filter extends Jq { + def ~(that: Filter): Filter = + (this, that) match { + case (Jq.Identity, _) => that + case (_, Jq.Identity) => this + case (Jq.Sequence(s1), Jq.Sequence(s2)) => Jq.Sequence(s1 ++ s2) + case (Jq.Sequence(s1), that: SimpleFilter) => Jq.Sequence(s1 :+ that) + case (f: SimpleFilter, Jq.Sequence(s2)) => Jq.Sequence(f +: s2) + case (f1: SimpleFilter, f2: SimpleFilter) => Jq.Sequence(NonEmptyChain(f1, f2)) + } + +} + +object Filter { + implicit object monoid extends Monoid[Filter] { + override def combine(x: Filter, y: Filter): Filter = x ~ y + override def empty: Filter = Jq.Identity + } +} +sealed trait SimpleFilter extends Filter + +sealed trait Constructor extends Jq +object Jq { + // filters + case object Root extends SimpleFilter + case object Identity extends SimpleFilter + final case class Field(name: String) extends SimpleFilter + final case class Index(idx: Int) extends SimpleFilter + final case class Slice(start: Int, end: Option[Int]) extends SimpleFilter + case object RecursiveDescent extends SimpleFilter + private[jq] case object Child extends SimpleFilter + final case class Sequence(jqs: NonEmptyChain[SimpleFilter]) extends Filter + + final case class Iterator(filter: Filter, inner: Jq) extends Jq + + // constructors + final case class Arr(prefix: Filter, values: List[Jq]) extends Constructor + final case class Obj(prefix: Filter, fields: List[(String, Jq)]) extends Constructor + final case class Num(n: String) extends Constructor + final case class Str(s: String) extends Constructor + final case class Bool(b: Boolean) extends Constructor + case object Null extends Constructor + +} diff --git a/json/src/main/scala/fs2/data/json/jq/internal/ESPCompiledJq.scala b/json/src/main/scala/fs2/data/json/jq/internal/ESPCompiledJq.scala new file mode 100644 index 000000000..4737e0066 --- /dev/null +++ b/json/src/main/scala/fs2/data/json/jq/internal/ESPCompiledJq.scala @@ -0,0 +1,44 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package json +package jq +package internal + +import cats.data.NonEmptyChain + +import tagged._ + +private[jq] class ESPCompiledJq[F[_]: RaiseThrowable](val esp: JqESP[F]) extends CompiledJq[F] { + + def apply(in: Stream[F, Token]): Stream[F, Token] = + in.through(JsonTagger.pipe) + .through(esp.pipe) + .map(untag(_)) + .unNone + + def andThen(that: CompiledJq[F]): CompiledJq[F] = + that match { + case that: PipedCompiledJq[F] => + new PipedCompiledJq[F](this +: that.jqs) + case _ => + // no idea how to fuse them, be naive + new PipedCompiledJq[F](NonEmptyChain(this, that)) + } + +} diff --git a/json/src/main/scala/fs2/data/json/jq/internal/ESPJqCompiler.scala b/json/src/main/scala/fs2/data/json/jq/internal/ESPJqCompiler.scala new file mode 100644 index 000000000..9a06c08c8 --- /dev/null +++ b/json/src/main/scala/fs2/data/json/jq/internal/ESPJqCompiler.scala @@ -0,0 +1,327 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package json +package jq +package internal + +import cats.data.{NonEmptyList, StateT} +import cats.syntax.all._ +import cats.{Defer, Eq, MonadThrow} + +import mft.query.{Query, QueryCompiler} +import tagged.TaggedJson +import pfsa._ + +private[jq] class ESPJqCompiler[F[_]](implicit F: MonadThrow[F], defer: Defer[F]) + extends QueryCompiler[TaggedJson, TaggedJson, Filter] + with Compiler[F] { + + override protected val emitSelected: Boolean = false + + private type State[T] = StateT[F, Int, T] + + private val nextIdent: State[String] = + for { + id <- StateT.get + _ <- StateT.set(id + 1) + } yield s"v$id" + + private val currentIdent: State[Option[String]] = + StateT.get.map { + case 0 => None + case n => Some(s"v${n - 1}") + } + + private def pure[T](v: T): State[T] = + StateT.pure(v) + + private def raiseError[T](exn: Throwable): State[T] = + exn.raiseError[State, T] + + type Matcher = TaggedMatcher + type Guard = GuardTaggedMatcher + type Pattern = PatternTaggedMatcher + type Char = TaggedJson + + override implicit def predicate: Pred[Matcher, Char] = TaggedMatcher.TaggedInstances + + override implicit def candidate: Candidate[Matcher, Char] = TaggedMatcher.TaggedInstances + + override implicit def charsEq: Eq[TaggedMatcher] = TaggedMatcher.eq + + override def tagOf(pattern: PatternTaggedMatcher): Option[TaggedJson] = pattern match { + case TaggedMatcher.StartJson => Some(TaggedJson.StartJson) + case TaggedMatcher.StartObject => Some(TaggedJson.Raw(Token.StartObject)) + case TaggedMatcher.StartArray => Some(TaggedJson.Raw(Token.StartArray)) + case TaggedMatcher.Field(name) => Some(TaggedJson.StartObjectValue(name)) + case TaggedMatcher.Index(idx) => Some(TaggedJson.StartArrayElement(idx)) + case TaggedMatcher.Any => None + } + + def path2regular(f: Filter): Regular[TaggedMatcher] = { + def loop(f: Filter): Regular[TaggedMatcher] = + f match { + case Jq.Root => + Regular.chars[TaggedMatcher](TaggedMatcher.StartJson) + case Jq.Slice(start, end) => + Regular.chars[TaggedMatcher](TaggedMatcher.StartArray) ~ Regular.chars(TaggedMatcher.Slice(start, end)) + case Jq.Index(idx) => + Regular.chars[TaggedMatcher](TaggedMatcher.StartArray) ~ Regular.chars(TaggedMatcher.Index(idx)) + case Jq.Child => + (Regular.chars[TaggedMatcher](TaggedMatcher.StartArray) || + Regular.chars[TaggedMatcher](TaggedMatcher.StartObject)) ~ Regular.any + case Jq.RecursiveDescent => + ((Regular.chars[TaggedMatcher](TaggedMatcher.StartArray) || + Regular.chars[TaggedMatcher](TaggedMatcher.StartObject)) ~ Regular.any).rep + case Jq.Field(name) => + Regular.chars[TaggedMatcher](TaggedMatcher.StartObject) ~ Regular.chars(TaggedMatcher.Field(name)) + case Jq.Identity => + Regular.epsilon + case Jq.Sequence(jqs) => + jqs.foldLeft(Regular.epsilon[TaggedMatcher])(_ ~ loop(_)) + } + loop(f) + } + + def cases(m: TaggedMatcher) = { + // first transform the matcher into DNF + m.dnf.toList + // then for each conjunctive clause, separate the + // pattern part (open tag or leaf) and the guard part + // drop the statically false clauses + .flatMap { atoms => + atoms.toList + .foldLeftM[Option, (PatternTaggedMatcher, List[GuardTaggedMatcher])]( + (TaggedMatcher.Any, List.empty[GuardTaggedMatcher])) { + case (_, TaggedMatcher.Fail) => + // fail the entire conjunction + none + case (acc, TaggedMatcher.Any) => + acc.some + case ((TaggedMatcher.Any, guard), pattern: PatternTaggedMatcher) => + // this is a finer pattern, save it + (pattern, guard).some + case ((TaggedMatcher.Any, guard), g: GuardTaggedMatcher) => + // some guard for sure (almost) + (TaggedMatcher.Any, g :: guard).some + case ((pat1 @ TaggedMatcher.Field(fld1), guard), TaggedMatcher.Field(fld2)) => + // check whether both are compatible, and if yes, keep the most restrictive one + (fld1, fld2) match { + case (fld1, fld2) => + if (fld1 === fld2) (pat1, guard).some + else none + } + case ((pat @ TaggedMatcher.Field(fld1), guard), TaggedMatcher.Not(TaggedMatcher.Field(fld2))) => + if (fld1 =!= fld2) + // guard is redundant + (pat, guard).some + else + // incompatible + none + case ((TaggedMatcher.Field(_), _), + TaggedMatcher.Index(_) | TaggedMatcher.Slice(_, _) | TaggedMatcher.StartObject | + TaggedMatcher.StartArray | TaggedMatcher.StartJson) => + // incompatible + none + case (acc @ (TaggedMatcher.Field(_), _), + TaggedMatcher.Not( + TaggedMatcher.Index(_) | TaggedMatcher.Slice(_, _) | TaggedMatcher.StartObject | + TaggedMatcher.StartArray | TaggedMatcher.StartJson)) => + // guard is redundant + acc.some + case ((pat1 @ TaggedMatcher.Index(idx1), guard), TaggedMatcher.Index(idx2)) => + // check whether both are compatible, and if yes, keep the most restrictive one + (idx1, idx2) match { + case (idx1, idx2) => + if (idx1 === idx2) (pat1, guard).some + else none + } + case ((pat @ TaggedMatcher.Index(idx), guard), TaggedMatcher.Slice(start, end)) => + if (idx >= start && end.forall(idx < _)) + // guard is redundant + (pat, guard).some + else + // incompatible + none + case ((pat @ TaggedMatcher.Index(idx1), guard), TaggedMatcher.Not(TaggedMatcher.Index(idx2))) => + if (idx1 =!= idx2) + // guard is redundant + (pat, guard).some + else // incompatible + none + case ((pat @ TaggedMatcher.Index(idx), guard), TaggedMatcher.Not(TaggedMatcher.Slice(start, end))) => + if (idx < start || end.exists(start >= _)) + // guard is redundant + (pat, guard).some + else + // incompatible + none + case (acc @ (TaggedMatcher.Index(_), _), + TaggedMatcher.Not( + TaggedMatcher.Field(_) | TaggedMatcher.StartObject | TaggedMatcher.StartArray | + TaggedMatcher.StartJson)) => + // redundant + acc.some + case ((TaggedMatcher.Index(_), _), + TaggedMatcher.Field(_) | TaggedMatcher.StartObject | TaggedMatcher.StartArray | + TaggedMatcher.StartJson) => + // incompatible + none + case (acc @ (TaggedMatcher.StartObject, _), TaggedMatcher.StartObject) => + // redundant + acc.some + case ((TaggedMatcher.StartObject, _), TaggedMatcher.Not(TaggedMatcher.StartObject)) => + // incompatible + none + case (acc @ (TaggedMatcher.StartObject, _), TaggedMatcher.Not(_)) => + // redundant + acc.some + case ((TaggedMatcher.StartObject, _), _) => + // incompatible + none + case (acc @ (TaggedMatcher.StartArray, _), TaggedMatcher.StartArray) => + // redundant + acc.some + case ((TaggedMatcher.StartArray, _), TaggedMatcher.Not(TaggedMatcher.StartArray)) => + // incompatible + none + case (acc @ (TaggedMatcher.StartArray, _), TaggedMatcher.Not(_)) => + // redundant + acc.some + case ((TaggedMatcher.StartArray, _), _) => + // incompatible + none + case (acc @ (TaggedMatcher.StartJson, _), TaggedMatcher.StartJson) => + // redundant + acc.some + case ((TaggedMatcher.StartJson, _), TaggedMatcher.Not(TaggedMatcher.StartJson)) => + // incompatible + none + case (acc @ (TaggedMatcher.StartJson, _), TaggedMatcher.Not(_)) => + // redundant + acc.some + case ((TaggedMatcher.StartJson, _), _) => + // incompatible + none + } + } + } + + private def preprocess(prefix: Filter, jq: Jq): State[Query[TaggedJson, Filter]] = + jq match { + case Jq.Null => + pure(Query.Leaf(TaggedJson.Raw(Token.NullValue))) + case Jq.Bool(b) => + pure(Query.Leaf(TaggedJson.Raw(if (b) Token.TrueValue else Token.FalseValue))) + case Jq.Arr(prefix1, values) => + values.zipWithIndex + .traverse { case (elt, idx) => + preprocess(prefix ~ prefix1, elt).map(q => Query.Node(TaggedJson.StartArrayElement(idx), q)) + } + .map { elts => + Query.Node(TaggedJson.Raw(Token.StartArray), + NonEmptyList.fromList(elts).fold(Query.empty[TaggedJson, Filter])(Query.Sequence(_))) + } + case Jq.Num(n) => + pure(Query.Leaf(TaggedJson.Raw(Token.NumberValue(n.toString())))) + case Jq.Str(s) => + pure(Query.Leaf(TaggedJson.Raw(Token.StringValue(s)))) + case Jq.Obj(prefix1, fields) => + val (iterators, vs) = + fields.zipWithIndex.partitionEither { + case ((name, it @ Jq.Iterator(_, _)), idx) => Left((name, it, idx)) + case (kv, _) => Right(kv) + } + iterators match { + case Nil => + vs.traverse { case (name, elt) => + if (elt == Jq.Identity && prefix1 == Jq.Identity) + currentIdent.flatMap { + case Some(v) => + Query.node[TaggedJson, Filter](TaggedJson.StartObjectValue(name), Query.variable(v)).pure[State] + case None => + preprocess(prefix ~ prefix1, elt).map(q => Query.node(TaggedJson.StartObjectValue(name), q)) + } + else + preprocess(prefix ~ prefix1, elt).map(q => Query.node(TaggedJson.StartObjectValue(name), q)) + }.map { elts => + Query.Node(TaggedJson.Raw(Token.StartObject), + NonEmptyList.fromList(elts).fold(Query.empty[TaggedJson, Filter])(Query.Sequence(_))) + } + case (name, Jq.Iterator(filter, inner), idx) :: Nil => + for { + values <- vs.traverse { case (name, elt) => + for { + v <- nextIdent + q <- + if (elt == Jq.Identity) + Query.Variable[TaggedJson, Filter](v).pure[State] + else + preprocess(prefix ~ prefix1, elt) + } yield (v, Query.Node(TaggedJson.StartObjectValue(name), q)) + } + v <- nextIdent + inner <- + if (inner == Jq.Identity) + Query.Variable[TaggedJson, Filter](v).pure[State] + else + preprocess(Jq.Identity, inner) + } yield { + val (before, after) = values.splitAt(idx) + val forClause: Query[TaggedJson, Filter] = + Query.ForClause( + v, + prefix ~ prefix1 ~ filter ~ Jq.Child, + Query.Node( + TaggedJson.Raw(Token.StartObject), + Query.Sequence( + NonEmptyList[Query[TaggedJson, Filter]](Query.Node(TaggedJson.StartObjectValue(name), inner), + after.map(kv => + Query.Variable[TaggedJson, Filter](kv._1))) + .prependList(before.map(kv => Query.Variable[TaggedJson, Filter](kv._1)))) + ) + ) + values.foldLeft(forClause) { case (inner, (v, q)) => Query.LetClause(v, q, inner) } + } + case _ => + raiseError( + JqException(s"object constructors may have only one iterator element, but got ${iterators.size}")) + } + case Jq.Iterator(filter, inner: Constructor) => + for { + v <- nextIdent + inner <- preprocess(Jq.Identity, inner) + } yield Query.ForClause(v, prefix ~ filter ~ Jq.Child, inner) + case Jq.Iterator(filter, inner) => + for { + v <- nextIdent + inner <- preprocess(Jq.Child, inner) + } yield Query.ForClause(v, prefix ~ filter, inner) + case filter: Filter => + pure(Query.Ordpath(prefix ~ filter)) + } + + def compile(jq: Jq): F[CompiledJq[F]] = + for { + query <- preprocess(Jq.Root, jq).runA(0) + mft = compile(query) + esp <- mft.esp + } yield new ESPCompiledJq[F](esp) + +} diff --git a/json/src/main/scala/fs2/data/json/jq/internal/package.scala b/json/src/main/scala/fs2/data/json/jq/internal/package.scala new file mode 100644 index 000000000..2c5686d8f --- /dev/null +++ b/json/src/main/scala/fs2/data/json/jq/internal/package.scala @@ -0,0 +1,117 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package json +package jq + +import cats.data.NonEmptyList +import cats.syntax.all._ + +import tagged.TaggedJson +import esp.{Conversion, ESP, Tag} +import pattern.{ConstructorTree, Evaluator, Selectable} + +package object internal { + + private[internal] type JqESP[F[_]] = ESP[F, NonEmptyList[GuardTaggedMatcher], TaggedJson, TaggedJson] + + private[internal] implicit object selected extends Selectable[TaggedJson, Tag[TaggedJson]] { + + override def tree(e: TaggedJson): ConstructorTree[Tag[TaggedJson]] = + e match { + case TaggedJson.EndObjectValue => + ConstructorTree(Tag.Close, List(ConstructorTree.noArgConstructor(Tag.Name(TaggedJson.EndObjectValue)))) + case TaggedJson.StartArrayElement(idx) => + ConstructorTree(Tag.Open, List(ConstructorTree.noArgConstructor(Tag.Name(TaggedJson.StartArrayElement(idx))))) + case TaggedJson.StartObjectValue(key) => + ConstructorTree(Tag.Open, List(ConstructorTree.noArgConstructor(Tag.Name(TaggedJson.StartObjectValue(key))))) + case TaggedJson.EndArrayElement => + ConstructorTree(Tag.Close, List(ConstructorTree.noArgConstructor(Tag.Name(TaggedJson.EndArrayElement)))) + case TaggedJson.Raw(token) => + token match { + case Token.Key(_) => + throw new Exception("this case should never occur, this is a bug") + case Token.StartArray | Token.StartObject => + ConstructorTree(Tag.Open, List(ConstructorTree.noArgConstructor(Tag.Name(e)))) + case Token.EndArray | Token.EndObject => + ConstructorTree(Tag.Close, List(ConstructorTree.noArgConstructor(Tag.Name(e)))) + case _ => + ConstructorTree(Tag.Leaf, List(ConstructorTree.noArgConstructor(Tag.Value(e)))) + } + case TaggedJson.StartJson => + ConstructorTree(Tag.Open, List(ConstructorTree.noArgConstructor(Tag.Name(e)))) + case TaggedJson.EndJson => + ConstructorTree(Tag.Close, List(ConstructorTree.noArgConstructor(Tag.Name(e)))) + } + + } + + private[internal] implicit object conversion extends Conversion[TaggedJson, TaggedJson] { + + override def makeOpen(t: TaggedJson): TaggedJson = t + + override def makeClose(t: TaggedJson): TaggedJson = + t match { + case TaggedJson.StartArrayElement(_) => TaggedJson.EndArrayElement + case TaggedJson.StartObjectValue(_) => TaggedJson.EndObjectValue + case TaggedJson.Raw(Token.StartArray) => TaggedJson.Raw(Token.EndArray) + case TaggedJson.Raw(Token.StartObject) => TaggedJson.Raw(Token.EndObject) + case TaggedJson.StartJson => TaggedJson.EndJson + case _ => t + } + + override def makeLeaf(t: TaggedJson): TaggedJson = t + + } + + private[internal] implicit object evaluator extends Evaluator[NonEmptyList[GuardTaggedMatcher], Tag[TaggedJson]] { + + private def eval(guard: GuardTaggedMatcher, tree: ConstructorTree[Tag[TaggedJson]]): Boolean = { + (guard, tree) match { + case (TaggedMatcher.Slice(start, end), + ConstructorTree(Tag.Open, List(ConstructorTree(Tag.Name(TaggedJson.StartArrayElement(idx)), Nil)))) => + idx >= start && end.forall(idx < _) + case (TaggedMatcher.Slice(_, _), _) => + false + case (TaggedMatcher.Not(TaggedMatcher.StartObject), + ConstructorTree(Tag.Open, List(ConstructorTree(Tag.Name(TaggedJson.Raw(Token.StartObject)), Nil)))) => + false + case (TaggedMatcher.Not(TaggedMatcher.StartArray), + ConstructorTree(Tag.Open, List(ConstructorTree(Tag.Name(TaggedJson.Raw(Token.StartArray)), Nil)))) => + false + case (TaggedMatcher.Not(TaggedMatcher.Index(idx1)), + ConstructorTree(Tag.Open, List(ConstructorTree(Tag.Name(TaggedJson.StartArrayElement(idx2)), Nil)))) => + idx1 =!= idx2 + case (TaggedMatcher.Not(TaggedMatcher.Slice(start, end)), + ConstructorTree(Tag.Open, List(ConstructorTree(Tag.Name(TaggedJson.StartArrayElement(idx)), Nil)))) => + idx < start || end.exists(idx >= _) + case (TaggedMatcher.Not(TaggedMatcher.Field(name1)), + ConstructorTree(Tag.Open, List(ConstructorTree(Tag.Name(TaggedJson.StartObjectValue(name2)), Nil)))) => + name1 =!= name2 + case (TaggedMatcher.Not(_), _) => + true + } + } + + override def eval(guard: NonEmptyList[GuardTaggedMatcher], + tree: ConstructorTree[Tag[TaggedJson]]): Option[Tag[TaggedJson]] = + guard.forall(eval(_, tree)).guard[Option].as(Tag.Open) + + } + +} diff --git a/json/src/main/scala/fs2/data/json/jsonpath/internals/JsonQueryPipe.scala b/json/src/main/scala/fs2/data/json/jsonpath/internals/JsonQueryPipe.scala index 07a2146b4..d0254d08c 100644 --- a/json/src/main/scala/fs2/data/json/jsonpath/internals/JsonQueryPipe.scala +++ b/json/src/main/scala/fs2/data/json/jsonpath/internals/JsonQueryPipe.scala @@ -20,13 +20,15 @@ package json package jsonpath package internals +import tagged._ import pfsa._ +import cats.Eq import cats.effect.Concurrent import cats.syntax.all._ -private[jsonpath] sealed trait PathMatcher -private[jsonpath] object PathMatcher { +private[json] sealed trait PathMatcher +private[json] object PathMatcher { case object True extends PathMatcher case object False extends PathMatcher case object AnyKey extends PathMatcher @@ -38,25 +40,30 @@ private[jsonpath] object PathMatcher { def intersection(that: Range): Range = Range(Math.max(this.low, that.low), Math.min(this.high, that.high)) + def union(that: Range): Range = + Range(math.min(this.low, that.low), math.max(this.high, that.high)) + } case class And(left: PathMatcher, right: PathMatcher) extends PathMatcher case class Or(left: PathMatcher, right: PathMatcher) extends PathMatcher case class Not(inner: PathMatcher) extends PathMatcher + implicit val eq: Eq[PathMatcher] = Eq.fromUniversalEquals + implicit val PathMatcherPred: Pred[PathMatcher, TaggedJson] = new Pred[PathMatcher, TaggedJson] { - override def satsifies(p: PathMatcher)(e: TaggedJson): Boolean = + override def satisfies(p: PathMatcher)(e: TaggedJson): Boolean = (p, e) match { case (True, _) => true case (Range(low, high), TaggedJson.StartArrayElement(idx)) => idx >= low && idx <= high case (AnyKey, TaggedJson.StartObjectValue(_)) => true case (Key(n1), TaggedJson.StartObjectValue(n2)) => n1 === n2 - case (And(l, r), _) => satsifies(l)(e) && satsifies(r)(e) - case (Or(l, r), _) => satsifies(l)(e) || satsifies(r)(e) - case (Not(i), _) => !satsifies(i)(e) + case (And(l, r), _) => satisfies(l)(e) && satisfies(r)(e) + case (Or(l, r), _) => satisfies(l)(e) || satisfies(r)(e) + case (Not(i), _) => !satisfies(i)(e) case (_, _) => false } @@ -89,13 +96,14 @@ private[jsonpath] object PathMatcher { override def or(p1: PathMatcher, p2: PathMatcher): PathMatcher = (p1, p2) match { - case (True, _) => True - case (_, True) => True - case (False, _) => p2 - case (_, False) => p1 - case (Key(_) | AnyKey, AnyKey) => AnyKey - case (AnyKey, Key(_) | AnyKey) => AnyKey - case (_, _) => Or(p1, p2) + case (True, _) => True + case (_, True) => True + case (False, _) => p2 + case (_, False) => p1 + case (r1 @ Range(_, _), r2 @ Range(_, _)) if r1.intersects(r2) => r1.union(r2) + case (Key(_) | AnyKey, AnyKey) => AnyKey + case (AnyKey, Key(_) | AnyKey) => AnyKey + case (_, _) => Or(p1, p2) } override def not(p: PathMatcher): PathMatcher = diff --git a/json/src/main/scala/fs2/data/json/jsonpath/package.scala b/json/src/main/scala/fs2/data/json/jsonpath/package.scala index 1773a7a20..4ab25c845 100644 --- a/json/src/main/scala/fs2/data/json/jsonpath/package.scala +++ b/json/src/main/scala/fs2/data/json/jsonpath/package.scala @@ -24,6 +24,7 @@ import cats.syntax.all._ import scala.annotation.nowarn import ast.Builder +import tagged._ import jsonpath.internals._ import pfsa.{PDFA, PNFA} @@ -121,15 +122,6 @@ package object jsonpath { } - private def untag(tj: TaggedJson): Option[Token] = - tj match { - case TaggedJson.Raw(t) => Some(t) - case TaggedJson.StartArrayElement(_) => None - case TaggedJson.EndArrayElement => None - case TaggedJson.StartObjectValue(name) => Some(Token.Key(name)) - case TaggedJson.EndObjectValue => None - } - private def compileJsonPath(path: JsonPath): PDFA[PathMatcher, TaggedJson] = { def makeKey(p: Property): PathMatcher = diff --git a/json/src/main/scala/fs2/data/json/jsonpath/internals/JsonTagger.scala b/json/src/main/scala/fs2/data/json/tagged/JsonTagger.scala similarity index 84% rename from json/src/main/scala/fs2/data/json/jsonpath/internals/JsonTagger.scala rename to json/src/main/scala/fs2/data/json/tagged/JsonTagger.scala index bda06be50..099f9edcc 100644 --- a/json/src/main/scala/fs2/data/json/jsonpath/internals/JsonTagger.scala +++ b/json/src/main/scala/fs2/data/json/tagged/JsonTagger.scala @@ -17,25 +17,42 @@ package fs2 package data package json -package jsonpath -package internals +package tagged import scala.collection.mutable.ListBuffer +import cats.Show +import cats.syntax.all._ -sealed trait TaggedJson -object TaggedJson { +private[json] sealed trait TaggedJson +private[json] object TaggedJson { + case object StartJson extends TaggedJson + case object EndJson extends TaggedJson case class Raw(token: Token) extends TaggedJson case class StartArrayElement(idx: Int) extends TaggedJson case object EndArrayElement extends TaggedJson case class StartObjectValue(name: String) extends TaggedJson case object EndObjectValue extends TaggedJson + implicit object show extends Show[TaggedJson] { + + override def show(t: TaggedJson): String = + t match { + case Raw(token) => token.jsonRepr + case StartArrayElement(idx) => show".[$idx]" + case EndArrayElement => ".[/]" + case StartObjectValue(name) => show".{$name}" + case EndObjectValue => ".{/}" + case _ => "" + } + + } + } /** Tags json tokens in an xml like fashion, with explicit open and close tags for points * of interest. This allows for implementing interesting queries with a simple tree automaton. */ -object JsonTagger { +private[json] object JsonTagger { def pipe[F[_]: RaiseThrowable]: Pipe[F, Token, TaggedJson] = { def object_(chunk: Chunk[Token], idx: Int, rest: Stream[F, Token], chunkAcc: ListBuffer[TaggedJson]) @@ -118,8 +135,8 @@ object JsonTagger { Pull.done } } else { - value_(chunk, idx, rest, chunkAcc).flatMap { case (chunk, idx, rest, chunkAcc) => - go_(chunk, idx, rest, chunkAcc) + value_(chunk, idx, rest, chunkAcc += TaggedJson.StartJson).flatMap { case (chunk, idx, rest, chunkAcc) => + go_(chunk, idx, rest, chunkAcc += TaggedJson.EndJson) } } diff --git a/json/src/main/scala/fs2/data/json/tagged/package.scala b/json/src/main/scala/fs2/data/json/tagged/package.scala new file mode 100644 index 000000000..fcc9315ab --- /dev/null +++ b/json/src/main/scala/fs2/data/json/tagged/package.scala @@ -0,0 +1,32 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2.data.json + +package object tagged { + + private[json] def untag(tj: TaggedJson): Option[Token] = + tj match { + case TaggedJson.Raw(t) => Some(t) + case TaggedJson.StartArrayElement(_) => None + case TaggedJson.EndArrayElement => None + case TaggedJson.StartObjectValue(name) => Some(Token.Key(name)) + case TaggedJson.EndObjectValue => None + case TaggedJson.StartJson => None + case TaggedJson.EndJson => None + } + +} diff --git a/json/src/test/scala/fs2/data/json/jq/JqSpec.scala b/json/src/test/scala/fs2/data/json/jq/JqSpec.scala new file mode 100644 index 000000000..013bb77ca --- /dev/null +++ b/json/src/test/scala/fs2/data/json/jq/JqSpec.scala @@ -0,0 +1,358 @@ +/* + * Copyright 2023 Lucas Satabin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package json +package jq + +import cats.effect._ +import weaver._ + +import json.literals._ +import literals._ + +object JqSpec extends SimpleIOSuite { + + val input: Stream[IO, Token] = json"""{ + "a": [ + {"b" : 0}, + {"b" : 1}, + {"b" : 2} + ] + }""".lift[IO] + + val compiler = Compiler[IO] + + test("select simple path") { + for { + compiled <- compiler.compile(jq".a[0].b") + result <- input.through(compiled).compile.toList + } yield expect.same(List(Token.NumberValue("0")), result) + } + + test("simple recursive descent") { + for { + compiled <- compiler.compile(jq"..") + result <- input.through(compiled).compile.toList + } yield expect.same( + List( + Token.StartObject, + Token.Key("a"), + Token.StartArray, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("0"), + Token.EndObject, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("1"), + Token.EndObject, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("2"), + Token.EndObject, + Token.EndArray, + Token.EndObject, + Token.StartArray, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("0"), + Token.EndObject, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("1"), + Token.EndObject, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("2"), + Token.EndObject, + Token.EndArray, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("0"), + Token.EndObject, + Token.NumberValue("0"), + Token.StartObject, + Token.Key("b"), + Token.NumberValue("1"), + Token.EndObject, + Token.NumberValue("1"), + Token.StartObject, + Token.Key("b"), + Token.NumberValue("2"), + Token.EndObject, + Token.NumberValue("2") + ), + result + ) + } + + test("prefixed recursive descent") { + for { + compiled <- compiler.compile(jq".a | ..") + result <- input.through(compiled).compile.toList + } yield expect.same( + List( + Token.StartArray, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("0"), + Token.EndObject, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("1"), + Token.EndObject, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("2"), + Token.EndObject, + Token.EndArray, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("0"), + Token.EndObject, + Token.NumberValue("0"), + Token.StartObject, + Token.Key("b"), + Token.NumberValue("1"), + Token.EndObject, + Token.NumberValue("1"), + Token.StartObject, + Token.Key("b"), + Token.NumberValue("2"), + Token.EndObject, + Token.NumberValue("2") + ), + result + ) + } + + test("identity") { + for { + compiled <- compiler.compile(jq".") + result <- input.through(compiled).compile.toList + } yield expect.same( + List( + Token.StartObject, + Token.Key("a"), + Token.StartArray, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("0"), + Token.EndObject, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("1"), + Token.EndObject, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("2"), + Token.EndObject, + Token.EndArray, + Token.EndObject + ), + result + ) + } + + test("identity noop") { + for { + compiled <- compiler.compile(jq".a | . | .[2]") + result <- input.through(compiled).compile.toList + } yield expect.same( + List( + Token.StartObject, + Token.Key("b"), + Token.NumberValue("2"), + Token.EndObject + ), + result + ) + } + + test("iterator") { + for { + compiled <- compiler.compile(jq".a[]") + result <- input.through(compiled).compile.toList + } yield expect.same( + List( + Token.StartObject, + Token.Key("b"), + Token.NumberValue("0"), + Token.EndObject, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("1"), + Token.EndObject, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("2"), + Token.EndObject + ), + result + ) + } + + test("object iterator") { + for { + compiled <- compiler.compile(jq""".a | {"before": true, "value": .[].b, "after": .[0].b}""") + result <- input.through(compiled).compile.toList + } yield expect.same( + List( + Token.StartObject, + Token.Key("before"), + Token.TrueValue, + Token.Key("value"), + Token.NumberValue("0"), + Token.Key("after"), + Token.NumberValue("0"), + Token.EndObject, + Token.StartObject, + Token.Key("before"), + Token.TrueValue, + Token.Key("value"), + Token.NumberValue("1"), + Token.Key("after"), + Token.NumberValue("0"), + Token.EndObject, + Token.StartObject, + Token.Key("before"), + Token.TrueValue, + Token.Key("value"), + Token.NumberValue("2"), + Token.Key("after"), + Token.NumberValue("0"), + Token.EndObject + ), + result + ) + } + + test("array iterator") { + for { + compiled <- compiler.compile(jq"""[ "before", .a[], "after" ]""") + result <- input.through(compiled).compile.toList + } yield expect.same( + List( + Token.StartArray, + Token.StringValue("before"), + Token.StartObject, + Token.Key("b"), + Token.NumberValue("0"), + Token.EndObject, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("1"), + Token.EndObject, + Token.StartObject, + Token.Key("b"), + Token.NumberValue("2"), + Token.EndObject, + Token.StringValue("after"), + Token.EndArray + ), + result + ) + } + + test("array iterator with constructor") { + for { + compiled <- compiler.compile(jq"""[ "before", .a[] | { "value": .b }, "after" ]""") + result <- input.through(compiled).compile.toList + } yield expect.same( + List( + Token.StartArray, + Token.StringValue("before"), + Token.StartObject, + Token.Key("value"), + Token.NumberValue("0"), + Token.EndObject, + Token.StartObject, + Token.Key("value"), + Token.NumberValue("1"), + Token.EndObject, + Token.StartObject, + Token.Key("value"), + Token.NumberValue("2"), + Token.EndObject, + Token.StringValue("after"), + Token.EndArray + ), + result + ) + } + + test("object iterator with constructor") { + for { + compiled <- compiler.compile(jq"""[ true, .a[].b | {"value": . }, false ]""") + result <- input.through(compiled).compile.toList + } yield expect.same( + List( + Token.StartArray, + Token.TrueValue, + Token.StartObject, + Token.Key("value"), + Token.NumberValue("0"), + Token.EndObject, + Token.StartObject, + Token.Key("value"), + Token.NumberValue("1"), + Token.EndObject, + Token.StartObject, + Token.Key("value"), + Token.NumberValue("2"), + Token.EndObject, + Token.FalseValue, + Token.EndArray + ), + result + ) + } + + test("object iterator with constructor iterator") { + for { + compiled <- compiler.compile(jq""".a[] | { "value": . }""") + result <- input.through(compiled).compile.toList + } yield expect.same( + List( + Token.StartObject, + Token.Key("value"), + Token.StartObject, + Token.Key("b"), + Token.NumberValue("0"), + Token.EndObject, + Token.EndObject, + Token.StartObject, + Token.Key("value"), + Token.StartObject, + Token.Key("b"), + Token.NumberValue("1"), + Token.EndObject, + Token.EndObject, + Token.StartObject, + Token.Key("value"), + Token.StartObject, + Token.Key("b"), + Token.NumberValue("2"), + Token.EndObject, + Token.EndObject + ), + result + ) + } + +} diff --git a/site/documentation/json/directory.conf b/site/documentation/json/directory.conf index 83f49ddbf..aba3e3778 100644 --- a/site/documentation/json/directory.conf +++ b/site/documentation/json/directory.conf @@ -1,7 +1,8 @@ laika.navigationOrder = [ index.md, jsonpath.md, + jq.md, libraries.md, jsonpatch.md, - interpolators.md + transformations.md ] diff --git a/site/documentation/json/index.md b/site/documentation/json/index.md index 9002481d5..7ae3e00ba 100644 --- a/site/documentation/json/index.md +++ b/site/documentation/json/index.md @@ -31,74 +31,6 @@ stream.compile.toList The pipe validates the JSON structure while parsing. It reads all the json values in the input stream and emits tokens as they are available. -## Selectors - -Selectors can be used to select a subset of a JSON token stream. There are several ways to create selectors: - - - build the selector using the constructors, which can be quite verbose and cumbersome; - - parse a string with the selector syntax; - - use the selector DSL. - -### Parsing a string using the selector syntax - -For instance, to select and enumerate elements that are in the `field3` array, you can create this selector. Only the tokens describing the values in `field3` will be emitted as a result. - -```scala mdoc - -type ThrowableEither[T] = Either[Throwable, T] - -val selector = ".field3.[]".parseSelector[ThrowableEither].toTry.get -``` - -The `parseSelector` method implicitly comes from the `import fs2.data.json._` and wraps the result in anything that has an [`MonadError` with error type `Throwable`][monad-error] to catch potential parsing errors. If you prefer not to have this wrapping and don't mind an extra dependency, you can have a look at [the interpolator][interpolator-doc]. - -The filter syntax is as follows: - - - `.` selects the root values, it is basically the identity filter. - - `.f` selects the field named `f` in objects. It fails if the value it is applied to is not a JSON object. - - `f` must be a valid Java identifier, meaning it has to respect this regular expression: `[a-zA-Z_][a-zA-Z0-9_]*`. If you wish to select a field that doesn't respect this regular expression, you can use the syntax `.["my non-identifier field"]` described below. - - name `f` can be immediately followed by a `!` to mark it as mandatory. Stream will fail if the end of the object the selector is applied to is reached and the field was not present in the object. - - `.f?` is similar to `.f` but doesn't fail in case the value it is applied to is not a JSON object. - - both `!` and `?` can be combined as `.f!?` to indicate that if the value it is applied to is a JSON object, then the field must be in it. - - `.["f1", "f2", ..., "fn"]` selects only fields `f1` to `fn` in objects. It fails if the value it is applied to is not an object. - - the field list can be immediately followed by a `!` to mark all fields as mandatory. Stream will fail if the end of the object the selector is applied to is reached and at least one field in the list was not present in the object. - - `.["f1", "f2", ..., "fn"]?` is similar to `.["f1", "f2", ..., "fn"]` but doesn't fail if the value it is applied to is not an object. - - both `!` and `?` can be combined as `.["f1", "f2", ..., "fn"]!?` to indicate that if the value it is applied to is a JSON object, then all the specified fields must be in it. - - `.[id1, idx2, ..., idxn]` selects only elements `idx1`, ..., `idxn` in arrays. It fails if the value it is applied to is not an array. - - `.[idx1, idx2, ..., idxn]?` is similar to `.[idx1, idx2, ..., idxn]` but doesn't fail if the value it is applied to is not an array. - - `.[idx1:idx2]` selects only elements between `idx1` (inclusive) and `idx2` (exclusive) in arrays. It fails if the value it is applied to is not an array. - - `.[idx1:idx2]?` is similar to `.[idx1:idx2]` but doesn't fail if the value it is applied to is not an array. - - `.[]` selects and enumerates elements from arrays or objects. It fails if the value it is applied to is not an array or an object. - - `.[]?` is similar as `.[]` but doesn't fail if the value it is applied to is neither an array nor an object. - - `sel1 sel2` applies selector `sel1` to the root value, and selector `sel2` to each selected value. - -### Using the selector DSL - -The selector DSL is a nice way to describe selectors without using any string parsing. They also allow for programmatically building selectors. -The DSL resides within the `fs2.data.json.selector` package, and you start a selector using the `root` builder. -The selector above can be written like this with the DSL: - -```scala mdoc -import fs2.data.json.selector._ - -val selectorFromDsl = root.field("field3").iterate.compile -``` - -The `.compile` in the end transforms the previous selector builder from the DSL into the final selector. Builders are safe to reuse, re-compose and compile several times. - -You can express the same selectors as with the syntax described above. For instance to make the field mandatory and the iteration lenient you can do: - -```scala mdoc:nest -val selectorFromDsl = root.field("field3").!.iterate.?.compile -``` - -The DSL is typesafe, so that you cannot write invalid selectors. Any attempt to do so results in a compilation error. - -```scala mdoc:fail -// array index selection cannot be made mandatory -root.index(1).! -``` - ## AST builder and tokenizer To handle Json ASTs, you can use the types and pipes available in the `fs2.data.json.ast` package. @@ -137,23 +69,19 @@ stream.through(values[Fallible, SomeJsonType]) ### From values to `Token`s -If you provide an implicit @:api(fs2.data.json.ast.Tokenizer), which describes how a JSON AST is transformed into JSON tokens, you can apply transformations to the JSON stream. For instance, you can apply a function `fun` to all values in the `fields3` array by using this code: +JSON tokens can be built from an existing existing value, provided you have an implicit @:api(fs2.data.json.ast.Tokenizer), using the `tokenize` pipe. ```scala mdoc:compile-only import ast._ trait SomeJsonType -implicit val builder: Builder[SomeJsonType] = ??? -implicit val tokenizer: Tokenizer[SomeJsonType] = ??? +val v: SomeJsonType = ??? -def fun(json: SomeJsonType): SomeJsonType = ??? +implicit val tokenizer: Tokenizer[SomeJsonType] = ??? -stream.through(transform[Fallible, SomeJsonType](selector, fun)) +Stream.emit(v).through(tokenize[Fallible, SomeJsonType]) ``` -For concrete examples of provided `Builder`s and `Tokenizer`s, please refer to [the JSON library binding modules documentation][json-lib-doc]. - -Sometimes you would like to delete some Json values from the input stream, based o some predicate at a given path, and keep the rest untouched. In this case, you can use the `transformOpt` pipe, and return `None` for values you want to remove from the stream. ## Serializers and deserializers @@ -276,4 +204,3 @@ For more pipes and options, please refer to the @:api(fs2.data.json.package$$wra [json-lib-doc]: /documentation/json/libraries.md [interpolator-doc]: /documentation/json/interpolators.md -[monad-error]: https://typelevel.org/cats/api/cats/MonadError.html diff --git a/site/documentation/json/interpolators.md b/site/documentation/json/interpolators.md deleted file mode 100644 index ae4bf72f3..000000000 --- a/site/documentation/json/interpolators.md +++ /dev/null @@ -1,18 +0,0 @@ -# Interpolators - -Module: [![Maven Central](https://img.shields.io/maven-central/v/org.gnieh/fs2-data-json-interpolators_2.13.svg)](https://mvnrepository.com/artifact/org.gnieh/fs2-data-json-interpolators_2.13) - -The `fs2-data-json-interpolators` module provides users with some useful string interpolators. The interpolators are based on [literally][literally] and are statically checked. - -## Selector interpolator - -You can use the `selector` interpolator to parse a literal string. - -The example above can be rewritten as: -```scala mdoc -import fs2.data.json.interpolators._ - -val selector = selector".field3.[]" -``` - -[literally]: https://github.com/typelevel/literally diff --git a/site/documentation/json/jq.md b/site/documentation/json/jq.md new file mode 100644 index 000000000..5f7ccfef0 --- /dev/null +++ b/site/documentation/json/jq.md @@ -0,0 +1,143 @@ +# JSON Queries + +Module: [![Maven Central](https://img.shields.io/maven-central/v/org.gnieh/fs2-data-json_2.13.svg)](https://mvnrepository.com/artifact/org.gnieh/fs2-data-json_2.13) + +The `fs2-data-json` module provides a streaming implementation of a [jq][jq]-like query language. + +This allows for extracting and transforming JSON data in a streaming and declarative fashion. +It can be useful when you want to extract and transform only a part of an input JSON data. + +@:callout(info) +The JSON query feature is still **experimental**. It should be stable enough to be used but you can come across some bugs when using complex queries. In such a case, do not hesitate to reach out on Discord or GitHub (see link at the top of the page). +@:@ + +Let's use the following JSON input as an example. + +```scala mdoc +import cats.effect.SyncIO +import cats.syntax.all._ + +import fs2._ +import fs2.data.json._ + +val input = """{ + | "field1": 0, + | "field2": "test", + | "field3": [1, 2, 3] + |}""".stripMargin + +val stream = Stream.emit(input).through(tokens[SyncIO, String]) +``` + +## Building a query + +There are several ways to create queries: + + - build the query using the constructors, which can be quite verbose and cumbersome; + - parse a string with the jq parser; + - use the `jq` interpolator. + +### Parsing a string using the jq parser + +For instance, to create an output array containing one element per element in `field3`, elements being objects with `field2` and the current value from `field3` we can write: + +```scala mdoc +import fs2.data.json.jq._ + +val wrappedQuery = JqParser.either("""[ { "field2": .field2, "field3": .field3[] } ]""") +``` + +The jq parser wraps the result in anything that has an [`MonadError` with error type `Throwable`][monad-error] to catch potential parsing errors. If you prefer not to have this wrapping, you can use the `jq` interpolator. + +```scala mdoc +import fs2.data.json.jq.literals._ + +val query = jq"""[ { "field2": .field2, "field3": .field3[] } ]""" +``` + +## The query language + +The general form of a query is a two phases process: + + - A potential _filtering_ phase, which selects some elements from the input stream. If the selector is not provided, then the input value is selected as a whole. + - A potential _construction_ phase, which builds the output based on the elements selected by the selection phase. If the constructor is not provided, then the selected elements are emitted unchanged. + +The constructor can contain sub-queries applied to each selected element. + +So a query is of the form: + + - `filter` to just apply a selection to input (think [JSONPath][jsonpath]). + - `filter | constructor` to select only a subset of the input and build the result out of the selected elements. + - `constructor` to build the result from the input top-value. + +### The filters + +Following filters exist: + + - `.` is the identity filter, that selects the current value. + - `.field` or `.["field"]` is the field filter, that selects the specified field name on the current value if it is an object. If the current value is not an object, then nothing is selected. + - `.[idx]` is the array element filter, that selects the value at the specified index in the current value if it is an array. If the current value is not an array, then nothing is selected. + - `.[idx1:idx1]`, `.[idx:]`, `.[:idx]` is the array slice filter, that selects only elements within the provided lower bound (inclusive) and upper bound (exclusive) on the current value if it is an array. If the lower bound is not specified, then the slice is from the beginning of the array, up to the upper bound (exclusive). If the upper bound is not specified, then the slice starts from the lower bound (inclusive), up to the end of the array. Negative indices are not supported. If the current value is not an array, then nothing is selected. + - `..` is the recursive descent filter. + - `.[]` is the iterator filter, that selects every element on the current value if it is an object or an array. If the input is neither an object nor an array, then nothing is selected. + +Filters can be sequenced by using the pipe (`|`) symbol as separator, for instance to select the field `a` and then only the third element in `a` if it is an array, the filter is `.a | .[2]`. +The pipe separator can be elided in many cases, and the leading dot that would follow it is then removed. For instance, the previous example can also be written `.a[2]`. + +@:callout(warning) +The recursive descent operator must be preceded by a pipe if it is not the first operator. +@:@ + +### The constructors + +### Values + +Any JSON scalar value is a valid query constructor, it means: + + - `null` + - `"some string"` + - `12`, `0.1` + - `true`, `false` + +build the equivalent JSON value. The scalar values do not depend on the selected values from the input. + +#### Objects + +It is possible to build a JSON object, whose field values may depend on the selected elements from the filter phase, by using the following syntax: `{ "a": query1, "b": query2, ... }`. + +Each object value is a full query. If a query emits several elements (e.g. is an iterator), then one object is emitted to the output per element the iterator filter selects. + +@:callout(warning) +The object constructor can only contain one top-level iterator query. For instance, trying to compile the following query will **fail**: +``` +{ "a": .a[], "b": .b[] } +``` +@:@ + +#### Arrays + +It is possible to build a JSON array, whose element values may depend on the selected elements from the filter phase, by using the following syntax: `[ query1, query2, ... ]`. + +Each value is a full query. If a query emits several elements (e.g. is an iterator), then all resulting elements are emitted as array elements, in the order they are selected. + +## Using queries + +A query must first be compiled to be usable. Compiling a query can be a quite expensive computation, but a compiled query can be reused any number of time, so you usually will compile it only once. + +To use a query, make your stream pass though the compiled query obtained above. A compiled query is a `Pipe[F, Token, Token]`. + +```scala mdoc +val qCompiler = jq.Compiler[SyncIO] + +val compiled = qCompiler.compile(query).unsafeRunSync() + +stream + .through(compiled) + .compile + .to(collector.pretty()) + .unsafeRunSync() +``` + +[jq]: https://jqlang.github.io/jq/ +[jsonpath]: jsonpath.md +[monad-error]: https://typelevel.org/cats/api/cats/MonadError.html diff --git a/site/documentation/json/transformations.md b/site/documentation/json/transformations.md new file mode 100644 index 000000000..4404edc3f --- /dev/null +++ b/site/documentation/json/transformations.md @@ -0,0 +1,117 @@ +# Transformations + +Sometimes [JSONPath][jsonpath] and [JSON queries][jq] are not sufficient for your use case. In these cases, you can use the transformation pipes provided by the @:api(fs2.data.json.ast.transform.package) package. + +## Selectors + +Selectors can be used to select a subset of a JSON token stream. There are several ways to create selectors: + + - build the selector using the constructors, which can be quite verbose and cumbersome; + - parse a string with the selector syntax; + - use the selector DSL. + +### Parsing a string using the selector syntax + +For instance, to select and enumerate elements that are in the `field3` array, you can create this selector. Only the tokens describing the values in `field3` will be emitted as a result. + +```scala mdoc +import fs2._ +import fs2.data.json._ + +type ThrowableEither[T] = Either[Throwable, T] + +val selector = ".field3.[]".parseSelector[ThrowableEither].toTry.get +``` + +The `parseSelector` method implicitly comes from the `import fs2.data.json._` and wraps the result in anything that has an [`MonadError` with error type `Throwable`][monad-error] to catch potential parsing errors. If you prefer not to have this wrapping and don't mind an extra dependency, you can have a look at [the interpolator][Parsing a string using the selector interpolator]. + +The filter syntax is as follows: + + - `.` selects the root values, it is basically the identity filter. + - `.f` selects the field named `f` in objects. It fails if the value it is applied to is not a JSON object. + - `f` must be a valid Java identifier, meaning it has to respect this regular expression: `[a-zA-Z_][a-zA-Z0-9_]*`. If you wish to select a field that doesn't respect this regular expression, you can use the syntax `.["my non-identifier field"]` described below. + - name `f` can be immediately followed by a `!` to mark it as mandatory. Stream will fail if the end of the object the selector is applied to is reached and the field was not present in the object. + - `.f?` is similar to `.f` but doesn't fail in case the value it is applied to is not a JSON object. + - both `!` and `?` can be combined as `.f!?` to indicate that if the value it is applied to is a JSON object, then the field must be in it. + - `.["f1", "f2", ..., "fn"]` selects only fields `f1` to `fn` in objects. It fails if the value it is applied to is not an object. + - the field list can be immediately followed by a `!` to mark all fields as mandatory. Stream will fail if the end of the object the selector is applied to is reached and at least one field in the list was not present in the object. + - `.["f1", "f2", ..., "fn"]?` is similar to `.["f1", "f2", ..., "fn"]` but doesn't fail if the value it is applied to is not an object. + - both `!` and `?` can be combined as `.["f1", "f2", ..., "fn"]!?` to indicate that if the value it is applied to is a JSON object, then all the specified fields must be in it. + - `.[id1, idx2, ..., idxn]` selects only elements `idx1`, ..., `idxn` in arrays. It fails if the value it is applied to is not an array. + - `.[idx1, idx2, ..., idxn]?` is similar to `.[idx1, idx2, ..., idxn]` but doesn't fail if the value it is applied to is not an array. + - `.[idx1:idx2]` selects only elements between `idx1` (inclusive) and `idx2` (exclusive) in arrays. It fails if the value it is applied to is not an array. + - `.[idx1:idx2]?` is similar to `.[idx1:idx2]` but doesn't fail if the value it is applied to is not an array. + - `.[]` selects and enumerates elements from arrays or objects. It fails if the value it is applied to is not an array or an object. + - `.[]?` is similar as `.[]` but doesn't fail if the value it is applied to is neither an array nor an object. + - `sel1 sel2` applies selector `sel1` to the root value, and selector `sel2` to each selected value. + +### Using the selector DSL + +The selector DSL is a nice way to describe selectors without using any string parsing. They also allow for programmatically building selectors. +The DSL resides within the `fs2.data.json.selector` package, and you start a selector using the `root` builder. +The selector above can be written like this with the DSL: + +```scala mdoc +import fs2.data.json.selector._ + +val selectorFromDsl = root.field("field3").iterate.compile +``` + +The `.compile` in the end transforms the previous selector builder from the DSL into the final selector. Builders are safe to reuse, re-compose and compile several times. + +You can express the same selectors as with the syntax described above. For instance to make the field mandatory and the iteration lenient you can do: + +```scala mdoc:nest +val selectorFromDsl = root.field("field3").!.iterate.?.compile +``` + +The DSL is typesafe, so that you cannot write invalid selectors. Any attempt to do so results in a compilation error. + +```scala mdoc:fail +// array index selection cannot be made mandatory +root.index(1).! +``` + +### Parsing a string using the selector interpolator + +Module: [![Maven Central](https://img.shields.io/maven-central/v/org.gnieh/fs2-data-json-interpolators_2.13.svg)](https://mvnrepository.com/artifact/org.gnieh/fs2-data-json-interpolators_2.13) + +The `fs2-data-json-interpolators` module provides users with some useful string interpolators. The interpolators are based on [literally][literally] and are statically checked. + +You can use the `selector` interpolator to parse a literal string. + +The example above can be rewritten as: +```scala mdoc +import fs2.data.json.interpolators._ + +val selector = selector".field3.[]" +``` + +## Using the selectors + +All the pipes in this package are based on a selector, a @:api(fs2.data.json.ast.Builder), and a @:api(fs2.data.json.ast.Tokenizer). + +If you provide an implicit @:api(fs2.data.json.ast.Tokenizer), which describes how a JSON AST is transformed into JSON tokens, you can apply transformations to the JSON stream. For instance, you can apply a function `fun` to all values in the `fields3` array by using this code: + +```scala mdoc:compile-only +import ast._ + +trait SomeJsonType + +implicit val builder: Builder[SomeJsonType] = ??? +implicit val tokenizer: Tokenizer[SomeJsonType] = ??? + +def fun(json: SomeJsonType): SomeJsonType = ??? + +val stream: Stream[Fallible, Token] = ??? + +stream.through(transform[Fallible, SomeJsonType](selector, fun)) +``` +For concrete examples of provided `Builder`s and `Tokenizer`s, please refer to [the JSON library binding modules documentation][json-lib-doc]. + +Sometimes you would like to delete some Json values from the input stream, based o some predicate at a given path, and keep the rest untouched. In this case, you can use the `transformOpt` pipe, and return `None` for values you want to remove from the stream. + +[literally]: https://github.com/typelevel/literally +[jsonpath]: jsonpath.md +[jq]: jq.md +[monad-error]: https://typelevel.org/cats/api/cats/MonadError.html diff --git a/xml/src/main/scala/fs2/data/xml/xpath/internals/LocationMatch.scala b/xml/src/main/scala/fs2/data/xml/xpath/internals/LocationMatch.scala index e2b903292..47c727f7b 100644 --- a/xml/src/main/scala/fs2/data/xml/xpath/internals/LocationMatch.scala +++ b/xml/src/main/scala/fs2/data/xml/xpath/internals/LocationMatch.scala @@ -42,7 +42,7 @@ object LocationMatch { implicit val LocationMatchPred: Pred[LocationMatch, StartElement] = new Pred[LocationMatch, StartElement] { - override def satsifies(p: LocationMatch)(e: StartElement): Boolean = + override def satisfies(p: LocationMatch)(e: StartElement): Boolean = p match { case True => true case False => false @@ -50,9 +50,9 @@ object LocationMatch { case AttrExists(attr) => e.attributes.contains(attr) case AttrEq(attr, value) => e.attributes.get(attr).contains(value) case AttrNeq(attr, value) => e.attributes.get(attr).fold(false)(_ =!= value) - case And(left, right) => satsifies(left)(e) && satsifies(right)(e) - case Or(left, right) => satsifies(left)(e) || satsifies(right)(e) - case Not(inner) => !satsifies(inner)(e) + case And(left, right) => satisfies(left)(e) && satisfies(right)(e) + case Or(left, right) => satisfies(left)(e) || satisfies(right)(e) + case Not(inner) => !satisfies(inner)(e) } override def always: LocationMatch = True