Skip to content

Commit

Permalink
Add copy_strings feature
Browse files Browse the repository at this point in the history
Some users of Jiffy have experienced issues when decoding large JSON
documents. Normally Jiffy expects smallish documents and returns any
strings as sub-binaries. When dealing with large documents these
sub-binary references can keep a large amount of RAM around unless the
user goes through and applies `binary:copy/1` on every string returned
from Jiffy. This however causes a large amount of CPU usage to do
something that Jiffy could do as it builds the JSON structure.

The `copy_strings` decoder option does exactly this. Instead of
returning sub-binaries Jiffy now copies every string into a newly
allocated binary. Users report that this fixes the memory issues while
also not negatively affecting performance significantly.
  • Loading branch information
davisp committed Nov 6, 2017
1 parent 7602dee commit dddb392
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 1 deletion.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ The options for decode are:
will ensure that the parsed object only contains a single entry
containing the last value seen. This mirrors the parsing beahvior
of virtually every other JSON parser.
* `copy_strings` - Normaly when strings are decoded they are created
as sub-binaries of the input data. With some workloads this can lead
to an undeseriable bloating of memory when a few small strings in JSON
keep a reference to the full JSON document alive. Setting this option
will instead allocate new binaries for each string to avoid keeping
the original JSON document around after garbage collection.
* `{bytes_per_red, N}` where N >= 0 - This controls the number of
bytes that Jiffy will process as an equivalent to a reduction. Each
20 reductions we consume 1% of our allocated time slice for the current
Expand Down
11 changes: 10 additions & 1 deletion c_src/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ typedef struct {
int return_maps;
int return_trailer;
int dedupe_keys;
int copy_strings;
ERL_NIF_TERM null_term;

char* p;
Expand Down Expand Up @@ -84,6 +85,7 @@ dec_new(ErlNifEnv* env)
d->return_maps = 0;
d->return_trailer = 0;
d->dedupe_keys = 0;
d->copy_strings = 0;
d->null_term = d->atoms->atom_null;

d->p = NULL;
Expand Down Expand Up @@ -291,9 +293,14 @@ dec_string(Decoder* d, ERL_NIF_TERM* value)
return 0;

parse:
if(!has_escape) {
if(!has_escape && !d->copy_strings) {
*value = enif_make_sub_binary(d->env, d->arg, st, (d->i - st - 1));
return 1;
} else if(!has_escape) {
ulen = d->i - 1 - st;
chrbuf = (char*) enif_make_new_binary(d->env, ulen, value),
memcpy(chrbuf, &(d->p[st]), ulen);
return 1;
}

hi = 0;
Expand Down Expand Up @@ -684,6 +691,8 @@ decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
d->return_trailer = 1;
} else if(enif_compare(val, d->atoms->atom_dedupe_keys) == 0) {
d->dedupe_keys = 1;
} else if(enif_compare(val, d->atoms->atom_copy_strings) == 0) {
d->copy_strings = 1;
} else if(enif_compare(val, d->atoms->atom_use_nil) == 0) {
d->null_term = d->atoms->atom_nil;
} else if(get_null_term(env, val, &(d->null_term))) {
Expand Down
1 change: 1 addition & 0 deletions c_src/jiffy.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
st->atom_null_term = make_atom(env, "null_term");
st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes");
st->atom_dedupe_keys = make_atom(env, "dedupe_keys");
st->atom_copy_strings = make_atom(env, "copy_strings");

// Markers used in encoding
st->ref_object = make_atom(env, "$object_ref$");
Expand Down
1 change: 1 addition & 0 deletions c_src/jiffy.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ typedef struct {
ERL_NIF_TERM atom_null_term;
ERL_NIF_TERM atom_escape_forward_slashes;
ERL_NIF_TERM atom_dedupe_keys;
ERL_NIF_TERM atom_copy_strings;

ERL_NIF_TERM ref_object;
ERL_NIF_TERM ref_array;
Expand Down
35 changes: 35 additions & 0 deletions test/jiffy_17_copy_strings_tests.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
% This file is part of Jiffy released under the MIT license.
% See the LICENSE file for more information.

-module(jiffy_17_copy_strings_tests).

-include_lib("eunit/include/eunit.hrl").


check_binaries({Props}) when is_list(Props) ->
lists:all(fun({Key, Value}) ->
check_binaries(Key) andalso check_binaries(Value)
end, Props);
check_binaries(Values) when is_list(Values) ->
lists:all(fun(Value) ->
check_binaries(Value)
end, Values);
check_binaries(Bin) when is_binary(Bin) ->
io:format("~s :: ~p ~p", [Bin, byte_size(Bin), binary:referenced_byte_size(Bin)]),
byte_size(Bin) == binary:referenced_byte_size(Bin);
check_binaries(Bin) ->
true.


copy_strings_test_() ->
Opts = [copy_strings],
Cases = [
<<"\"foo\"">>,
<<"[\"bar\"]">>,
<<"{\"foo\":\"bar\"}">>,
<<"{\"foo\":[\"bar\"]}">>
],
{"Test copy_strings", lists:map(fun(Json) ->
EJson = jiffy:decode(Json, Opts),
?_assert(check_binaries(EJson))
end, Cases)}.

0 comments on commit dddb392

Please sign in to comment.