Skip to content

Commit

Permalink
[SPARK-18076][CORE][SQL] Fix default Locale used in DateFormat, Numbe…
Browse files Browse the repository at this point in the history
…rFormat to Locale.US

## What changes were proposed in this pull request?

Fix `Locale.US` for all usages of `DateFormat`, `NumberFormat`
## How was this patch tested?

Existing tests.

Author: Sean Owen <[email protected]>

Closes apache#15610 from srowen/SPARK-18076.
  • Loading branch information
srowen authored and uzadude committed Jan 27, 2017
1 parent 6acf195 commit ce618e3
Show file tree
Hide file tree
Showing 31 changed files with 103 additions and 96 deletions.
8 changes: 4 additions & 4 deletions core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark
import java.io.IOException
import java.text.NumberFormat
import java.text.SimpleDateFormat
import java.util.Date
import java.util.{Date, Locale}

import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
Expand Down Expand Up @@ -67,12 +67,12 @@ class SparkHadoopWriter(jobConf: JobConf) extends Logging with Serializable {

def setup(jobid: Int, splitid: Int, attemptid: Int) {
setIDs(jobid, splitid, attemptid)
HadoopRDD.addLocalConfiguration(new SimpleDateFormat("yyyyMMddHHmmss").format(now),
HadoopRDD.addLocalConfiguration(new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(now),
jobid, splitID, attemptID, conf.value)
}

def open() {
val numfmt = NumberFormat.getInstance()
val numfmt = NumberFormat.getInstance(Locale.US)
numfmt.setMinimumIntegerDigits(5)
numfmt.setGroupingUsed(false)

Expand Down Expand Up @@ -162,7 +162,7 @@ class SparkHadoopWriter(jobConf: JobConf) extends Logging with Serializable {
private[spark]
object SparkHadoopWriter {
def createJobID(time: Date, id: Int): JobID = {
val formatter = new SimpleDateFormat("yyyyMMddHHmmss")
val formatter = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
val jobtrackerID = formatter.format(time)
new JobID(jobtrackerID, id)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.io.IOException
import java.lang.reflect.Method
import java.security.PrivilegedExceptionAction
import java.text.DateFormat
import java.util.{Arrays, Comparator, Date}
import java.util.{Arrays, Comparator, Date, Locale}

import scala.collection.JavaConverters._
import scala.util.control.NonFatal
Expand Down Expand Up @@ -357,7 +357,7 @@ class SparkHadoopUtil extends Logging {
* @return a printable string value.
*/
private[spark] def tokenToString(token: Token[_ <: TokenIdentifier]): String = {
val df = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT)
val df = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT, Locale.US)
val buffer = new StringBuilder(128)
buffer.append(token.toString)
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.deploy.master

import java.text.SimpleDateFormat
import java.util.Date
import java.util.{Date, Locale}
import java.util.concurrent.{ScheduledFuture, TimeUnit}

import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
Expand Down Expand Up @@ -51,7 +51,8 @@ private[deploy] class Master(

private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)

private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss") // For application IDs
// For application IDs
private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)

private val WORKER_TIMEOUT_MS = conf.getLong("spark.worker.timeout", 60) * 1000
private val RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.deploy.worker
import java.io.File
import java.io.IOException
import java.text.SimpleDateFormat
import java.util.{Date, UUID}
import java.util.{Date, Locale, UUID}
import java.util.concurrent._
import java.util.concurrent.{Future => JFuture, ScheduledFuture => JScheduledFuture}

Expand Down Expand Up @@ -68,7 +68,7 @@ private[deploy] class Worker(
ThreadUtils.newDaemonSingleThreadExecutor("worker-cleanup-thread"))

// For worker and executor IDs
private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss")
private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
// Send a heartbeat every (heartbeat timeout) / 4 milliseconds
private val HEARTBEAT_MILLIS = conf.getLong("spark.worker.timeout", 60) * 1000 / 4

Expand Down
5 changes: 3 additions & 2 deletions core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.rdd

import java.io.IOException
import java.text.SimpleDateFormat
import java.util.Date
import java.util.{Date, Locale}

import scala.collection.immutable.Map
import scala.reflect.ClassTag
Expand Down Expand Up @@ -243,7 +243,8 @@ class HadoopRDD[K, V](

var reader: RecordReader[K, V] = null
val inputFormat = getInputFormat(jobConf)
HadoopRDD.addLocalConfiguration(new SimpleDateFormat("yyyyMMddHHmmss").format(createTime),
HadoopRDD.addLocalConfiguration(
new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(createTime),
context.stageId, theSplit.index, context.attemptNumber, jobConf)
reader = inputFormat.getRecordReader(split.inputSplit.value, jobConf, Reporter.NULL)

Expand Down
4 changes: 2 additions & 2 deletions core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.rdd

import java.io.IOException
import java.text.SimpleDateFormat
import java.util.Date
import java.util.{Date, Locale}

import scala.reflect.ClassTag

Expand Down Expand Up @@ -79,7 +79,7 @@ class NewHadoopRDD[K, V](
// private val serializableConf = new SerializableWritable(_conf)

private val jobTrackerId: String = {
val formatter = new SimpleDateFormat("yyyyMMddHHmmss")
val formatter = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
formatter.format(new Date())
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.rdd

import java.nio.ByteBuffer
import java.text.SimpleDateFormat
import java.util.{Date, HashMap => JHashMap}
import java.util.{Date, HashMap => JHashMap, Locale}

import scala.collection.{mutable, Map}
import scala.collection.JavaConverters._
Expand Down Expand Up @@ -1079,7 +1079,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
// Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
val hadoopConf = conf
val job = NewAPIHadoopJob.getInstance(hadoopConf)
val formatter = new SimpleDateFormat("yyyyMMddHHmmss")
val formatter = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
val jobtrackerID = formatter.format(new Date())
val stageId = self.id
val jobConfiguration = job.getConfiguration
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.nio.charset.StandardCharsets
import java.text.SimpleDateFormat
import java.util.{Calendar, SimpleTimeZone}
import java.util.{Calendar, Locale, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}
Expand Down Expand Up @@ -86,7 +86,7 @@ private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

private[spark] object JacksonMessageWriter {
def makeISODateFormat: SimpleDateFormat = {
val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'")
val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'", Locale.US)
val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
iso8601.setCalendar(cal)
iso8601
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,20 @@
package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.TimeZone
import java.util.{Locale, TimeZone}
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

val timestamp: Long = {
val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz")
val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz", Locale.US)
try {
format.parse(originalValue).getTime()
} catch {
case _: ParseException =>
val gmtDay = new SimpleDateFormat("yyyy-MM-dd")
val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
try {
gmtDay.parse(originalValue).getTime()
Expand Down
3 changes: 2 additions & 1 deletion core/src/main/scala/org/apache/spark/ui/UIUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ private[spark] object UIUtils extends Logging {

// SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use.
private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
override def initialValue(): SimpleDateFormat =
new SimpleDateFormat("yyyy/MM/dd HH:mm:ss", Locale.US)
}

def formatDate(date: Date): String = dateFormat.get.format(date)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.util.logging

import java.text.SimpleDateFormat
import java.util.Calendar
import java.util.{Calendar, Locale}

import org.apache.spark.internal.Logging

Expand Down Expand Up @@ -59,7 +59,7 @@ private[spark] class TimeBasedRollingPolicy(
}

@volatile private var nextRolloverTime = calculateNextRolloverTime()
private val formatter = new SimpleDateFormat(rollingFileSuffixPattern)
private val formatter = new SimpleDateFormat(rollingFileSuffixPattern, Locale.US)

/** Should rollover if current time has exceeded next rollover time */
def shouldRollover(bytesToBeWritten: Long): Boolean = {
Expand Down Expand Up @@ -109,7 +109,7 @@ private[spark] class SizeBasedRollingPolicy(
}

@volatile private var bytesWrittenSinceRollover = 0L
val formatter = new SimpleDateFormat("--yyyy-MM-dd--HH-mm-ss--SSSS")
val formatter = new SimpleDateFormat("--yyyy-MM-dd--HH-mm-ss--SSSS", Locale.US)

/** Should rollover if the next set of bytes is going to exceed the size limit */
def shouldRollover(bytesToBeWritten: Long): Boolean = {
Expand Down
2 changes: 1 addition & 1 deletion core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
val hour = minute * 60
def str: (Long) => String = Utils.msDurationToString(_)

val sep = new DecimalFormatSymbols(Locale.getDefault()).getDecimalSeparator()
val sep = new DecimalFormatSymbols(Locale.US).getDecimalSeparator

assert(str(123) === "123 ms")
assert(str(second) === "1" + sep + "0 s")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.deploy.rest.mesos

import java.io.File
import java.text.SimpleDateFormat
import java.util.Date
import java.util.{Date, Locale}
import java.util.concurrent.atomic.AtomicLong
import javax.servlet.http.HttpServletResponse

Expand Down Expand Up @@ -62,11 +62,10 @@ private[mesos] class MesosSubmitRequestServlet(
private val DEFAULT_CORES = 1.0

private val nextDriverNumber = new AtomicLong(0)
private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss") // For application IDs
private def newDriverId(submitDate: Date): String = {
"driver-%s-%04d".format(
createDateFormat.format(submitDate), nextDriverNumber.incrementAndGet())
}
// For application IDs
private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
private def newDriverId(submitDate: Date): String =
f"driver-${createDateFormat.format(submitDate)}-${nextDriverNumber.incrementAndGet()}%04d"

/**
* Build a driver description from the fields specified in the submit request.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.Date
import java.util.{Date, Locale}

import scala.beans.BeanProperty

Expand All @@ -34,7 +34,7 @@ private[mllib] trait PMMLModelExport {
val version = getClass.getPackage.getImplementationVersion
val app = new Application("Apache Spark MLlib").setVersion(version)
val timestamp = new Timestamp()
.addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
.addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US).format(new Date()))
val header = new Header()
.setApplication(app)
.setTimestamp(timestamp)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.expressions

import java.text.SimpleDateFormat
import java.util.{Calendar, TimeZone}
import java.util.{Calendar, Locale, TimeZone}

import scala.util.Try

Expand Down Expand Up @@ -331,7 +331,7 @@ case class DateFormatClass(left: Expression, right: Expression) extends BinaryEx
override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)

override protected def nullSafeEval(timestamp: Any, format: Any): Any = {
val sdf = new SimpleDateFormat(format.toString)
val sdf = new SimpleDateFormat(format.toString, Locale.US)
UTF8String.fromString(sdf.format(new java.util.Date(timestamp.asInstanceOf[Long] / 1000)))
}

Expand Down Expand Up @@ -400,7 +400,7 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {

private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
private lazy val formatter: SimpleDateFormat =
Try(new SimpleDateFormat(constFormat.toString)).getOrElse(null)
Try(new SimpleDateFormat(constFormat.toString, Locale.US)).getOrElse(null)

override def eval(input: InternalRow): Any = {
val t = left.eval(input)
Expand All @@ -425,7 +425,7 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
null
} else {
val formatString = f.asInstanceOf[UTF8String].toString
Try(new SimpleDateFormat(formatString).parse(
Try(new SimpleDateFormat(formatString, Locale.US).parse(
t.asInstanceOf[UTF8String].toString).getTime / 1000L).getOrElse(null)
}
}
Expand Down Expand Up @@ -520,7 +520,7 @@ case class FromUnixTime(sec: Expression, format: Expression)

private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
private lazy val formatter: SimpleDateFormat =
Try(new SimpleDateFormat(constFormat.toString)).getOrElse(null)
Try(new SimpleDateFormat(constFormat.toString, Locale.US)).getOrElse(null)

override def eval(input: InternalRow): Any = {
val time = left.eval(input)
Expand All @@ -539,9 +539,10 @@ case class FromUnixTime(sec: Expression, format: Expression)
if (f == null) {
null
} else {
Try(UTF8String.fromString(new SimpleDateFormat(
f.asInstanceOf[UTF8String].toString).format(new java.util.Date(
time.asInstanceOf[Long] * 1000L)))).getOrElse(null)
Try(
UTF8String.fromString(new SimpleDateFormat(f.toString, Locale.US).
format(new java.util.Date(time.asInstanceOf[Long] * 1000L)))
).getOrElse(null)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1415,7 +1415,7 @@ case class Sentences(
val locale = if (languageStr != null && countryStr != null) {
new Locale(languageStr.toString, countryStr.toString)
} else {
Locale.getDefault
Locale.US
}
getSentences(string.asInstanceOf[UTF8String].toString, locale)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.catalyst.json

import java.util.Locale

import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.commons.lang3.time.FastDateFormat

Expand Down Expand Up @@ -56,11 +58,11 @@ private[sql] class JSONOptions(

// Uses `FastDateFormat` which can be direct replacement for `SimpleDateFormat` and thread-safe.
val dateFormat: FastDateFormat =
FastDateFormat.getInstance(parameters.getOrElse("dateFormat", "yyyy-MM-dd"))
FastDateFormat.getInstance(parameters.getOrElse("dateFormat", "yyyy-MM-dd"), Locale.US)

val timestampFormat: FastDateFormat =
FastDateFormat.getInstance(
parameters.getOrElse("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSZZ"))
parameters.getOrElse("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSZZ"), Locale.US)

// Parse mode flags
if (!ParseModes.isValidMode(parseMode)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util

import java.sql.{Date, Timestamp}
import java.text.{DateFormat, SimpleDateFormat}
import java.util.{Calendar, TimeZone}
import java.util.{Calendar, Locale, TimeZone}
import javax.xml.bind.DatatypeConverter

import scala.annotation.tailrec
Expand Down Expand Up @@ -79,14 +79,14 @@ object DateTimeUtils {
// `SimpleDateFormat` is not thread-safe.
val threadLocalTimestampFormat = new ThreadLocal[DateFormat] {
override def initialValue(): SimpleDateFormat = {
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
}
}

// `SimpleDateFormat` is not thread-safe.
private val threadLocalDateFormat = new ThreadLocal[DateFormat] {
override def initialValue(): SimpleDateFormat = {
new SimpleDateFormat("yyyy-MM-dd")
new SimpleDateFormat("yyyy-MM-dd", Locale.US)
}
}

Expand Down
Loading

0 comments on commit ce618e3

Please sign in to comment.