diff --git a/.gitignore b/.gitignore index 9600d7334fd30..93cc3542a596c 100644 --- a/.gitignore +++ b/.gitignore @@ -73,7 +73,7 @@ dist dist/ *.spec - +*.kcpps zig-out/ zig-cache/ diff --git a/ExtStuff.txt b/ExtStuff.txt deleted file mode 100644 index 66b9f139cbed5..0000000000000 --- a/ExtStuff.txt +++ /dev/null @@ -1 +0,0 @@ -smoothing_factor=0.5 \ No newline at end of file diff --git a/common/common.h b/common/common.h index 8dc76421c1ce9..0577cb36b3f6d 100644 --- a/common/common.h +++ b/common/common.h @@ -86,12 +86,12 @@ struct gpt_params { int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 float mirostat_tau = 5.00f; // target entropy float mirostat_eta = 0.10f; // learning rate + float randomization_factor = 0.0f; + float smoothing_factor = 0.0f; // DynaTemp! float dynatemp_range = 0.0f; // enables DynaTemp if greater than 0. dynatemp_min = temperature - dt_range, dynatemp_max = temperature + dt_range float dynatemp_exponent = 1.0f; - float randomization_factor = 0.0f; - float smoothing_factor = 0.0f; // // sampling parameters struct llama_sampling_params sparams; diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 771edc2d5be1c..293bb0e6e168b 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1483,6 +1483,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o generation_finished = true; return output; } + concat_output_mtx.lock(); concat_output = ""; concat_output_reader_copy = ""; @@ -1528,6 +1529,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o kcpp_params->mirostat_tau = inputs.mirostat_tau; kcpp_params->dynatemp_range = inputs.dynatemp_range; kcpp_params->dynatemp_exponent = inputs.dynatemp_exponent; + kcpp_params->randomization_factor = inputs.randomization_factor; kcpp_params->smoothing_factor = inputs.smoothing_factor; kcpp_params->n_ctx = inputs.max_context_length; kcpp_params->n_batch = n_batch; @@ -1926,6 +1928,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o const float tfs_z = kcpp_params->tfs_z; const float dynatemp_range = kcpp_params->dynatemp_range; const float dynatemp_exponent = kcpp_params->dynatemp_exponent; + const float randomization_factor = kcpp_params->randomization_factor; + const float smoothing_factor = kcpp_params->smoothing_factor; if (!startedsampling) { @@ -1981,7 +1985,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o id = SampleLogits(logitsPtr, nctx, n_vocab, last_n_size, repeat_penalty, presence_penalty, top_k, top_a, top_p, min_p, typical_p, tfs_z, temp, rng, - kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, sampler_order, grammar, dynatemp_range, dynatemp_exponent, kcpp_params->randomization_factor, kcpp_params->smoothing_factor); + kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, sampler_order, grammar, dynatemp_range, dynatemp_exponent, randomization_factor, smoothing_factor); if (grammar != nullptr) { grammar_accept_token(file_format, n_vocab, grammar, id); diff --git a/koboldcpp.py b/koboldcpp.py index 6562797dceb4a..361d8e65e40b0 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -81,6 +81,7 @@ class generation_inputs(ctypes.Structure): ("quiet", ctypes.c_bool), ("dynatemp_range", ctypes.c_float), ("dynatemp_exponent", ctypes.c_float), + ("randomization_factor", ctypes.c_float), ("smoothing_factor", ctypes.c_float), ("logit_biases", logit_bias * logit_bias_max)] @@ -329,7 +330,7 @@ def load_model(model_filename): ret = handle.load_model(inputs) return ret -def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}): +def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, randomization_factor=0.0, smoothing_factor=0.0, logit_biases={}): global maxctx, args, currentusergenkey, totalgens, pendingabortkey inputs = generation_inputs() outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs)) @@ -359,6 +360,7 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu inputs.quiet = quiet inputs.dynatemp_range = dynatemp_range inputs.dynatemp_exponent = dynatemp_exponent + inputs.randomization_factor = randomization_factor inputs.smoothing_factor = smoothing_factor inputs.grammar = grammar.encode("UTF-8") inputs.grammar_retain_state = grammar_retain_state @@ -556,7 +558,6 @@ def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat messages_string += assistant_message_start genparams["prompt"] = messages_string - return generate( prompt=genparams.get('prompt', ""), memory=genparams.get('memory', ""), @@ -587,6 +588,7 @@ def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat quiet=is_quiet, dynatemp_range=genparams.get('dynatemp_range', 0.0), dynatemp_exponent=genparams.get('dynatemp_exponent', 1.0), + randomization_factor=genparams.get('randomization_factor', 0.0), smoothing_factor=genparams.get('smoothing_factor', 0.0), logit_biases=genparams.get('logit_bias', {}) )