From a9ab63d4b6f2f51f5a256d2fcb8124c032a4ca94 Mon Sep 17 00:00:00 2001 From: Grisha Pomadchin Date: Thu, 1 Jun 2017 01:45:01 +0300 Subject: [PATCH] Move to com.networknt Json Schema validator Signed-off-by: Grisha Pomadchin --- project/Dependencies.scala | 2 + spark-etl/build.sbt | 2 +- .../src/main/resources/input-schema.json | 127 +++++++++--------- .../spark/etl/config/BaseEtlConf.scala | 45 ++++--- .../spark/etl/config/ConfigParse.scala | 8 +- 5 files changed, 99 insertions(+), 85 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 352cd9417f..32dd8cd1b8 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -48,4 +48,6 @@ object Dependencies { val slickPG = "com.github.tminglei" %% "slick-pg" % "0.14.6" val parserCombinators = "org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.5" + + val jsonSchemaValidator = "com.networknt" % "json-schema-validator" % "0.1.7" } diff --git a/spark-etl/build.sbt b/spark-etl/build.sbt index 22b76ce326..0f9a7d95b5 100644 --- a/spark-etl/build.sbt +++ b/spark-etl/build.sbt @@ -2,7 +2,7 @@ import Dependencies._ name := "geotrellis-spark-etl" libraryDependencies ++= Seq( - "com.github.fge" % "json-schema-validator" % "2.2.6", + jsonSchemaValidator, sparkCore % "provided", scalatest % "test") diff --git a/spark-etl/src/main/resources/input-schema.json b/spark-etl/src/main/resources/input-schema.json index 05b18d26c9..c1eae64f54 100644 --- a/spark-etl/src/main/resources/input-schema.json +++ b/spark-etl/src/main/resources/input-schema.json @@ -1,71 +1,74 @@ { "$schema": "http://json-schema.org/draft-04/schema#", "type": "array", - "properties": { - "format": { - "type": "string" - }, - "name": { - "type": "string" - }, - "cache": { - "type": "string" - }, - "crs": { - "type": "string" - }, - "noData": { - "type": "integer" - }, - "maxTileSize": { - "type": "integer" - }, - "numPartitions": { - "type": "integer" - }, - "clip": { - "type": "object", - "properties": { - "xmin": { - "type": "number" - }, - "ymin": { - "type": "number" - }, - "xmax": { - "type": "number" - }, - "ymax": { - "type": "number" - } + "items": { + "type": "object", + "properties": { + "format": { + "type": "string" }, - "required": [ - "xmin", - "ymin", - "xmax", - "ymax" - ] - }, - "backend": { - "type": "object", - "properties": { - "type": { - "type": "string" + "name": { + "type": "string" + }, + "cache": { + "type": "string" + }, + "crs": { + "type": "string" + }, + "noData": { + "type": "integer" + }, + "maxTileSize": { + "type": "integer" + }, + "numPartitions": { + "type": "integer" + }, + "clip": { + "type": "object", + "properties": { + "xmin": { + "type": "number" + }, + "ymin": { + "type": "number" + }, + "xmax": { + "type": "number" + }, + "ymax": { + "type": "number" + } }, - "path": { - "type": "string" - } + "required": [ + "xmin", + "ymin", + "xmax", + "ymax" + ] }, - "required": [ - "type", - "path" - ] - } + "backend": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "path": { + "type": "string" + } + }, + "required": [ + "type", + "path" + ] + } + }, + "required": [ + "format", + "name", + "backend" + ] }, - "required": [ - "format", - "name", - "backend" - ], "additionalProperties": false } diff --git a/spark-etl/src/main/scala/geotrellis/spark/etl/config/BaseEtlConf.scala b/spark-etl/src/main/scala/geotrellis/spark/etl/config/BaseEtlConf.scala index df65a2daf1..076a620217 100644 --- a/spark-etl/src/main/scala/geotrellis/spark/etl/config/BaseEtlConf.scala +++ b/spark-etl/src/main/scala/geotrellis/spark/etl/config/BaseEtlConf.scala @@ -17,12 +17,17 @@ package geotrellis.spark.etl.config import geotrellis.spark.etl.config.json._ +import geotrellis.util.LazyLogging import org.apache.spark.SparkContext -import com.github.fge.jackson.JsonLoader import spray.json._ -trait BaseEtlConf extends ConfigParse { +import scala.collection.JavaConverters._ + +trait BaseEtlConf extends ConfigParse with LazyLogging { + private def colorString(str: String, color: String = Console.RED) = s"${color}${str}${Console.RESET}" + private def loggerError(str: String, color: String = Console.RED) = logger.error(colorString(str, color)) + val help = """ |geotrellis-etl | @@ -40,9 +45,9 @@ trait BaseEtlConf extends ConfigParse { val requiredFields = Set('input, 'output, 'backendProfiles) - val backendProfilesSchema = schemaFactory.getJsonSchema(JsonLoader.fromResource("/backend-profiles-schema.json")) - val inputSchema = schemaFactory.getJsonSchema(JsonLoader.fromResource("/input-schema.json")) - val outputSchema = schemaFactory.getJsonSchema(JsonLoader.fromResource("/output-schema.json")) + val backendProfilesSchema = schemaFactory.getSchema(getClass.getResourceAsStream("/backend-profiles-schema.json")) + val inputSchema = schemaFactory.getSchema(getClass.getResourceAsStream("/input-schema.json")) + val outputSchema = schemaFactory.getSchema(getClass.getResourceAsStream("/output-schema.json")) def nextOption(map: Map[Symbol, String], list: Seq[String]): Map[Symbol, String] = list.toList match { @@ -58,7 +63,7 @@ trait BaseEtlConf extends ConfigParse { sys.exit(1) } case option :: tail => { - println(s"Unknown option ${option}") + println(colorString(s"Unknown option ${option}")) println(help) sys.exit(1) } @@ -68,28 +73,28 @@ trait BaseEtlConf extends ConfigParse { val m = parse(args) if(m.keySet != requiredFields) { - println(s"missing required field(s): ${(requiredFields -- m.keySet).mkString(", ")}, use --help command to get additional information about input options.") + loggerError(s"missing required field(s): ${(requiredFields -- m.keySet).mkString(", ")}, use --help command to get additional information about input options.") sys.exit(1) } val(backendProfiles, input, output) = (m('backendProfiles), m('input), m('output)) - val inputValidation = inputSchema.validate(JsonLoader.fromString(input), true) - val backendProfilesValidation = backendProfilesSchema.validate(JsonLoader.fromString(backendProfiles), true) - val outputValidation = outputSchema.validate(JsonLoader.fromString(output), true) + val inputValidation = inputSchema.validate(jsonNodeFromString(input)) + val backendProfilesValidation = backendProfilesSchema.validate(jsonNodeFromString(backendProfiles)) + val outputValidation = outputSchema.validate(jsonNodeFromString(output)) - if(!inputValidation.isSuccess || !backendProfilesValidation.isSuccess || !outputValidation.isSuccess) { - if(!inputValidation.isSuccess) { - println("input validation error:") - println(inputValidation) + if(!inputValidation.isEmpty || !backendProfilesValidation.isEmpty || !outputValidation.isEmpty) { + if(!inputValidation.isEmpty) { + loggerError(s"input validation errors:") + inputValidation.asScala.foreach(msg => loggerError(s" - ${msg.getMessage}")) } - if(!backendProfilesValidation.isSuccess) { - println("backendProfiles validation error:") - println(backendProfilesValidation) + if(!backendProfilesValidation.isEmpty) { + loggerError(s"backendProfiles validation error:") + backendProfilesValidation.asScala.foreach(msg => loggerError(s" - ${msg.getMessage}")) } - if(!outputValidation.isSuccess) { - println("output validation error:") - println(outputValidation) + if(!outputValidation.isEmpty) { + loggerError(s"output validation error:") + outputValidation.asScala.foreach(msg => loggerError(s" - ${msg.getMessage}")) } sys.exit(1) } diff --git a/spark-etl/src/main/scala/geotrellis/spark/etl/config/ConfigParse.scala b/spark-etl/src/main/scala/geotrellis/spark/etl/config/ConfigParse.scala index 67b741525d..d2df7c867d 100644 --- a/spark-etl/src/main/scala/geotrellis/spark/etl/config/ConfigParse.scala +++ b/spark-etl/src/main/scala/geotrellis/spark/etl/config/ConfigParse.scala @@ -16,7 +16,9 @@ package geotrellis.spark.etl.config -import com.github.fge.jsonschema.main.JsonSchemaFactory +import com.networknt.schema.JsonSchemaFactory +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.ObjectMapper import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.spark.SparkContext @@ -25,7 +27,7 @@ trait ConfigParse { val help: String val requiredFields: Set[Symbol] - val schemaFactory = JsonSchemaFactory.byDefault() + val schemaFactory = new JsonSchemaFactory() def getJson(filePath: String, conf: Configuration): String = { val path = new Path(filePath) @@ -35,6 +37,8 @@ trait ConfigParse { is.close(); fs.close(); json } + def jsonNodeFromString(content: String): JsonNode = new ObjectMapper().readTree(content) + def nextOption(map: Map[Symbol, String], list: Seq[String]): Map[Symbol, String] def parse(args: Seq[String])(implicit sc: SparkContext) =