Skip to content

Commit

Permalink
Expose Charset in a ShapeFileReader API (#3464)
Browse files Browse the repository at this point in the history
* Expose Charset in a ShapeFileReader API

* Add Shapefile with UTF-8 chars in properties

* Update CHANGELOG.md

Co-authored-by: Grigory Pomadchin <[email protected]>
  • Loading branch information
vlulla and pomadchin authored May 12, 2022
1 parent c1aeac2 commit 04dc66c
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 50 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed
- Expose Charset in a ShapeFileReader API [#3464](https://github.com/locationtech/geotrellis/pull/3464)

## [3.6.2] - 2022-04-05

### Changed
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PROJCS["WGS_84_World_Mercator",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Mercator"],PARAMETER["central_meridian",0],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["Meter",1],PARAMETER["standard_parallel_1",0.0]]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PROJCS["WGS 84 / World Mercator",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Mercator_1SP"],PARAMETER["central_meridian",0],PARAMETER["scale_factor",1],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","3395"]]
Binary file not shown.
Binary file not shown.
130 changes: 80 additions & 50 deletions shapefile/src/main/scala/geotrellis/shapefile/ShapeFileReader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ import org.geotools.data.shapefile._

import java.net.URL
import java.io.File
import java.nio.charset.Charset

import scala.collection.mutable
import scala.collection.JavaConverters._

object ShapeFileReader {
val DEFAULT_CHARSET = Charset.forName("ISO-8859-1")
implicit class SimpleFeatureWrapper(ft: SimpleFeature) {
def geom[G <: Geometry: Manifest]: Option[G] =
ft.getAttribute(0) match {
Expand All @@ -45,11 +47,15 @@ object ShapeFileReader {
ft.getAttribute(name).asInstanceOf[D]
}

def readSimpleFeatures(path: String): Seq[SimpleFeature] = readSimpleFeatures(new URL(s"file://${new File(path).getAbsolutePath}"))
// TODO: use default argument instead of overloads in the next major release
def readSimpleFeatures(path: String): Seq[SimpleFeature] = readSimpleFeatures(new URL(s"file://${new File(path).getAbsolutePath}"), DEFAULT_CHARSET)
def readSimpleFeatures(path: String, charSet: Charset): Seq[SimpleFeature] = readSimpleFeatures(new URL(s"file://${new File(path).getAbsolutePath}"), charSet)

def readSimpleFeatures(url: URL): Seq[SimpleFeature] = {
def readSimpleFeatures(url: URL): Seq[SimpleFeature] = readSimpleFeatures(url, DEFAULT_CHARSET)
def readSimpleFeatures(url: URL, charSet: Charset): Seq[SimpleFeature] = {
// Extract the features as GeoTools 'SimpleFeatures'
val ds = new ShapefileDataStore(url)
ds.setCharset(charSet)
val ftItr: SimpleFeatureIterator = ds.getFeatureSource.getFeatures.features

try {
Expand All @@ -62,99 +68,123 @@ object ShapeFileReader {
}
}

def readPointFeatures(path: String): Seq[PointFeature[Map[String,Object]]] =
readSimpleFeatures(path)
def readPointFeatures(path: String): Seq[PointFeature[Map[String,Object]]] = readPointFeatures(path, DEFAULT_CHARSET)
def readPointFeatures(path: String, charSet: Charset): Seq[PointFeature[Map[String,Object]]] =
readSimpleFeatures(path, charSet)
.flatMap { ft => ft.geom[Point].map(PointFeature(_, ft.attributeMap)) }

def readPointFeatures[D](path: String, dataField: String): Seq[PointFeature[D]] =
readSimpleFeatures(path)
def readPointFeatures[D](path: String, dataField: String): Seq[PointFeature[D]] = readPointFeatures(path, dataField, DEFAULT_CHARSET)
def readPointFeatures[D](path: String, dataField: String, charSet: Charset): Seq[PointFeature[D]] =
readSimpleFeatures(path, charSet)
.flatMap { ft => ft.geom[Point].map(PointFeature(_, ft.attribute[D](dataField))) }

def readPointFeatures(url: URL): Seq[PointFeature[Map[String,Object]]] =
readSimpleFeatures(url)
def readPointFeatures(url: URL): Seq[PointFeature[Map[String,Object]]] = readPointFeatures(url, DEFAULT_CHARSET)
def readPointFeatures(url: URL, charSet: Charset): Seq[PointFeature[Map[String,Object]]] =
readSimpleFeatures(url, charSet)
.flatMap { ft => ft.geom[Point].map(PointFeature(_, ft.attributeMap)) }

def readPointFeatures[D](url: URL, dataField: String): Seq[PointFeature[D]] =
readSimpleFeatures(url)
def readPointFeatures[D](url: URL, dataField: String): Seq[PointFeature[D]] = readPointFeatures(url, dataField, DEFAULT_CHARSET)
def readPointFeatures[D](url: URL, dataField: String, charSet: Charset): Seq[PointFeature[D]] =
readSimpleFeatures(url, charSet)
.flatMap { ft => ft.geom[Point].map(PointFeature(_, ft.attribute[D](dataField))) }

def readLineFeatures(path: String): Seq[Feature[LineString, Map[String,Object]]] =
readSimpleFeatures(path)
def readLineFeatures(path: String): Seq[Feature[LineString, Map[String,Object]]] = readLineFeatures(path, DEFAULT_CHARSET)
def readLineFeatures(path: String, charSet: Charset): Seq[Feature[LineString, Map[String,Object]]] =
readSimpleFeatures(path, charSet)
.flatMap { ft => ft.geom[LineString].map(Feature(_, ft.attributeMap)) }

def readLineFeatures[D](path: String, dataField: String): Seq[Feature[LineString, D]] =
readSimpleFeatures(path)
def readLineFeatures[D](path: String, dataField: String): Seq[Feature[LineString, D]] = readLineFeatures(path, dataField, DEFAULT_CHARSET)
def readLineFeatures[D](path: String, dataField: String, charSet: Charset): Seq[Feature[LineString, D]] =
readSimpleFeatures(path, charSet)
.flatMap { ft => ft.geom[LineString].map(Feature(_, ft.attribute[D](dataField))) }

def readLineFeatures(url: URL): Seq[Feature[LineString, Map[String,Object]]] =
readSimpleFeatures(url)
def readLineFeatures(url: URL): Seq[Feature[LineString, Map[String,Object]]] = readLineFeatures(url, DEFAULT_CHARSET)
def readLineFeatures(url: URL, charSet: Charset): Seq[Feature[LineString, Map[String,Object]]] =
readSimpleFeatures(url, charSet)
.flatMap { ft => ft.geom[LineString].map(Feature(_, ft.attributeMap)) }

def readLineFeatures[D](url: URL, dataField: String): Seq[Feature[LineString, D]] =
readSimpleFeatures(url)
def readLineFeatures[D](url: URL, dataField: String): Seq[Feature[LineString, D]] = readLineFeatures(url, dataField, DEFAULT_CHARSET)
def readLineFeatures[D](url: URL, dataField: String, charSet: Charset): Seq[Feature[LineString, D]] =
readSimpleFeatures(url, charSet)
.flatMap { ft => ft.geom[LineString].map(Feature(_, ft.attribute[D](dataField))) }

def readPolygonFeatures(path: String): Seq[PolygonFeature[Map[String,Object]]] =
readSimpleFeatures(path)
def readPolygonFeatures(path: String): Seq[PolygonFeature[Map[String,Object]]] = readPolygonFeatures(path, DEFAULT_CHARSET)
def readPolygonFeatures(path: String, charSet: Charset): Seq[PolygonFeature[Map[String,Object]]] =
readSimpleFeatures(path, charSet)
.flatMap { ft => ft.geom[Polygon].map(PolygonFeature(_, ft.attributeMap)) }

def readPolygonFeatures[D](path: String, dataField: String): Seq[PolygonFeature[D]] =
readSimpleFeatures(path)
def readPolygonFeatures[D](path: String, dataField: String): Seq[PolygonFeature[D]] = readPolygonFeatures(path, dataField, DEFAULT_CHARSET)
def readPolygonFeatures[D](path: String, dataField: String, charSet: Charset): Seq[PolygonFeature[D]] =
readSimpleFeatures(path, charSet)
.flatMap { ft => ft.geom[Polygon].map(PolygonFeature(_, ft.attribute[D](dataField))) }

def readPolygonFeatures(url: URL): Seq[PolygonFeature[Map[String,Object]]] =
readSimpleFeatures(url)
def readPolygonFeatures(url: URL): Seq[PolygonFeature[Map[String,Object]]] = readPolygonFeatures(url, DEFAULT_CHARSET)
def readPolygonFeatures(url: URL, charSet: Charset): Seq[PolygonFeature[Map[String,Object]]] =
readSimpleFeatures(url, charSet)
.flatMap { ft => ft.geom[Polygon].map(PolygonFeature(_, ft.attributeMap)) }

def readPolygonFeatures[D](url: URL, dataField: String): Seq[PolygonFeature[D]] =
readSimpleFeatures(url)
def readPolygonFeatures[D](url: URL, dataField: String): Seq[PolygonFeature[D]] = readPolygonFeatures(url, dataField, DEFAULT_CHARSET)
def readPolygonFeatures[D](url: URL, dataField: String, charSet: Charset): Seq[PolygonFeature[D]] =
readSimpleFeatures(url, charSet)
.flatMap { ft => ft.geom[Polygon].map(PolygonFeature(_, ft.attribute[D](dataField))) }

def readMultiPointFeatures(path: String): Seq[MultiPointFeature[Map[String,Object]]] =
readSimpleFeatures(path)
def readMultiPointFeatures(path: String): Seq[MultiPointFeature[Map[String,Object]]] = readMultiPointFeatures(path, DEFAULT_CHARSET)
def readMultiPointFeatures(path: String, charSet: Charset): Seq[MultiPointFeature[Map[String,Object]]] =
readSimpleFeatures(path, charSet)
.flatMap { ft => ft.geom[MultiPoint].map(MultiPointFeature(_, ft.attributeMap)) }

def readMultiPointFeatures[D](path: String, dataField: String): Seq[MultiPointFeature[D]] =
readSimpleFeatures(path)
def readMultiPointFeatures[D](path: String, dataField: String): Seq[MultiPointFeature[D]] = readMultiPointFeatures(path, dataField, DEFAULT_CHARSET)
def readMultiPointFeatures[D](path: String, dataField: String, charSet: Charset): Seq[MultiPointFeature[D]] =
readSimpleFeatures(path, charSet)
.flatMap { ft => ft.geom[MultiPoint].map(MultiPointFeature(_, ft.attribute[D](dataField))) }

def readMultiPointFeatures(url: URL): Seq[MultiPointFeature[Map[String,Object]]] =
readSimpleFeatures(url)
def readMultiPointFeatures(url: URL): Seq[MultiPointFeature[Map[String,Object]]] = readMultiPointFeatures(url, DEFAULT_CHARSET)
def readMultiPointFeatures(url: URL, charSet: Charset): Seq[MultiPointFeature[Map[String,Object]]] =
readSimpleFeatures(url, charSet)
.flatMap { ft => ft.geom[MultiPoint].map(MultiPointFeature(_, ft.attributeMap)) }

def readMultiPointFeatures[D](url: URL, dataField: String): Seq[MultiPointFeature[D]] =
readSimpleFeatures(url)
def readMultiPointFeatures[D](url: URL, dataField: String): Seq[MultiPointFeature[D]] = readMultiPointFeatures(url, dataField, DEFAULT_CHARSET)
def readMultiPointFeatures[D](url: URL, dataField: String, charSet: Charset): Seq[MultiPointFeature[D]] =
readSimpleFeatures(url, charSet)
.flatMap { ft => ft.geom[MultiPoint].map(MultiPointFeature(_, ft.attribute[D](dataField))) }

def readMultiLineFeatures(path: String): Seq[Feature[MultiLineString, Map[String,Object]]] =
readSimpleFeatures(path)
def readMultiLineFeatures(path: String): Seq[Feature[MultiLineString, Map[String,Object]]] = readMultiLineFeatures(path, DEFAULT_CHARSET)
def readMultiLineFeatures(path: String, charSet: Charset): Seq[Feature[MultiLineString, Map[String,Object]]] =
readSimpleFeatures(path, charSet)
.flatMap { ft => ft.geom[MultiLineString].map(Feature(_, ft.attributeMap)) }

def readMultiLineFeatures[D](path: String, dataField: String): Seq[Feature[MultiLineString, D]] =
readSimpleFeatures(path)
def readMultiLineFeatures[D](path: String, dataField: String): Seq[Feature[MultiLineString, D]] = readMultiLineFeatures(path, dataField, DEFAULT_CHARSET)
def readMultiLineFeatures[D](path: String, dataField: String, charSet: Charset): Seq[Feature[MultiLineString, D]] =
readSimpleFeatures(path, charSet)
.flatMap { ft => ft.geom[MultiLineString].map(Feature(_, ft.attribute[D](dataField))) }

def readMultiLineFeatures(url: URL): Seq[Feature[MultiLineString, Map[String,Object]]] =
readSimpleFeatures(url)
def readMultiLineFeatures(url: URL): Seq[Feature[MultiLineString, Map[String,Object]]] = readMultiLineFeatures(url, DEFAULT_CHARSET)
def readMultiLineFeatures(url: URL, charSet: Charset): Seq[Feature[MultiLineString, Map[String,Object]]] =
readSimpleFeatures(url, charSet)
.flatMap { ft => ft.geom[MultiLineString].map(Feature(_, ft.attributeMap)) }

def readMultiLineFeatures[D](url: URL, dataField: String): Seq[Feature[MultiLineString, D]] =
readSimpleFeatures(url)
def readMultiLineFeatures[D](url: URL, dataField: String): Seq[Feature[MultiLineString, D]] = readMultiLineFeatures(url, dataField, DEFAULT_CHARSET)
def readMultiLineFeatures[D](url: URL, dataField: String, charSet: Charset): Seq[Feature[MultiLineString, D]] =
readSimpleFeatures(url, charSet)
.flatMap { ft => ft.geom[MultiLineString].map(Feature(_, ft.attribute[D](dataField))) }

def readMultiPolygonFeatures(path: String): Seq[MultiPolygonFeature[Map[String,Object]]] =
readSimpleFeatures(path)
def readMultiPolygonFeatures(path: String): Seq[MultiPolygonFeature[Map[String,Object]]] = readMultiPolygonFeatures(path, DEFAULT_CHARSET)
def readMultiPolygonFeatures(path: String, charSet: Charset): Seq[MultiPolygonFeature[Map[String,Object]]] =
readSimpleFeatures(path, charSet)
.flatMap { ft => ft.geom[MultiPolygon].map(MultiPolygonFeature(_, ft.attributeMap)) }

def readMultiPolygonFeatures[D](path: String, dataField: String): Seq[MultiPolygonFeature[D]] =
readSimpleFeatures(path)
def readMultiPolygonFeatures[D](path: String, dataField: String): Seq[MultiPolygonFeature[D]] = readMultiPolygonFeatures(path, dataField, DEFAULT_CHARSET)
def readMultiPolygonFeatures[D](path: String, dataField: String, charSet: Charset): Seq[MultiPolygonFeature[D]] =
readSimpleFeatures(path, charSet)
.flatMap { ft => ft.geom[MultiPolygon].map(MultiPolygonFeature(_, ft.attribute[D](dataField))) }

def readMultiPolygonFeatures(url: URL): Seq[MultiPolygonFeature[Map[String,Object]]] =
readSimpleFeatures(url)
def readMultiPolygonFeatures(url: URL): Seq[MultiPolygonFeature[Map[String,Object]]] = readMultiPolygonFeatures(url, DEFAULT_CHARSET)
def readMultiPolygonFeatures(url: URL, charSet: Charset): Seq[MultiPolygonFeature[Map[String,Object]]] =
readSimpleFeatures(url, charSet)
.flatMap { ft => ft.geom[MultiPolygon].map(MultiPolygonFeature(_, ft.attributeMap)) }

def readMultiPolygonFeatures[D](url: URL, dataField: String): Seq[MultiPolygonFeature[D]] =
readSimpleFeatures(url)
def readMultiPolygonFeatures[D](url: URL, dataField: String): Seq[MultiPolygonFeature[D]] = readMultiPolygonFeatures(url, dataField, DEFAULT_CHARSET)
def readMultiPolygonFeatures[D](url: URL, dataField: String, charSet: Charset): Seq[MultiPolygonFeature[D]] =
readSimpleFeatures(url, charSet)
.flatMap { ft => ft.geom[MultiPolygon].map(MultiPolygonFeature(_, ft.attribute[D](dataField))) }
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

package geotrellis.shapefile

import java.net.URL
import java.nio.charset.Charset

import geotrellis.vector._

import org.scalatest.matchers.should.Matchers
Expand All @@ -31,5 +34,18 @@ class ShapeFileReaderSpec extends AnyFunSpec with Matchers {
data.keys.toSeq should be (Seq("LowIncome", "gbcode", "ename", "WorkingAge", "TotalPop", "Employment"))
}
}

// https://github.com/locationtech/geotrellis/issues/3445
it("should read UTF-8 MultiPolygons feature attributes") {
val path = "shapefile/data/shapefiles/demographics-utf8/demographics.shp"
val features = ShapeFileReader.readMultiPolygonFeatures(path, Charset.forName("UTF-8"))
features.size should be (160)

features.take(4).map(_.data("ename").asInstanceOf[String]) shouldBe Seq("南关街道", "七里烟香", "谢庄镇", "Cheng Guan Zhen")

val featuresInvalid = ShapeFileReader.readMultiPolygonFeatures(path)
val enames = featuresInvalid.take(4).map(_.data("ename").asInstanceOf[String])
enames should not be Seq("南关街道", "七里烟香", "谢庄镇", "Cheng Guan Zhen")
}
}
}

0 comments on commit 04dc66c

Please sign in to comment.