Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat kryo max buffersize #735

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,11 @@ class KryoSerializer(conf: SparkConf)
with Serializable {

private val bufferSize = conf.getInt("spark.kryoserializer.buffer.mb", 2) * 1024 * 1024
private val maxBufferSize = conf.getInt("spark.kryoserializer.buffer.max.mb", 64) * 1024 * 1024
private val referenceTracking = conf.getBoolean("spark.kryo.referenceTracking", true)
private val registrator = conf.getOption("spark.kryo.registrator")

def newKryoOutput() = new KryoOutput(bufferSize)
def newKryoOutput() = new KryoOutput(bufferSize, math.max(bufferSize, maxBufferSize))

def newKryo(): Kryo = {
val instantiator = new EmptyScalaKryoInstantiator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,36 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
}
}

class KryoSerializerResizableOutputSuite extends FunSuite {
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.LocalSparkContext
import org.apache.spark.SparkException

// trial and error showed this will not serialize with 1mb buffer
val x = (1 to 400000).toArray

test("kryo without resizable output buffer should fail on large array") {
val conf = new SparkConf(false)
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
conf.set("spark.kryoserializer.buffer.mb", "1")
conf.set("spark.kryoserializer.buffer.max.mb", "1")
val sc = new SparkContext("local", "test", conf)
intercept[SparkException](sc.parallelize(x).collect)
LocalSparkContext.stop(sc)
}

test("kryo with resizable output buffer should succeed on large array") {
val conf = new SparkConf(false)
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
conf.set("spark.kryoserializer.buffer.mb", "1")
conf.set("spark.kryoserializer.buffer.max.mb", "2")
val sc = new SparkContext("local", "test", conf)
assert(sc.parallelize(x).collect === x)
LocalSparkContext.stop(sc)
}
}

object KryoTest {
case class CaseClass(i: Int, s: String) {}

Expand Down
15 changes: 11 additions & 4 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -385,10 +385,17 @@ Apart from these, the following properties are also available, and may be useful
<td><code>spark.kryoserializer.buffer.mb</code></td>
<td>2</td>
<td>
Maximum object size to allow within Kryo (the library needs to create a buffer at least as
large as the largest single object you'll serialize). Increase this if you get a "buffer limit
exceeded" exception inside Kryo. Note that there will be one buffer <i>per core</i> on each
worker.
Object size to allow within Kryo using default (pre-allocated) buffers (the library needs to create
a buffer at least as large as the largest single object you'll serialize). Note that there will be
one buffer <i>per core</i> on each worker.
</td>
</tr>
<tr>
<td><code>spark.kryoserializer.buffer.max.mb</code></td>
<td>64</td>
<td>
Maximum object size to allow within Kryo by resizing buffers as needed (which has some overhead).
Increase this if you get a "buffer limit exceeded" exception inside Kryo.
</td>
</tr>
</table>
Expand Down