From cda83f8b9fbb3f4259ab13114f0b4f3724f42eeb Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Thu, 1 Aug 2024 16:06:23 -0700 Subject: [PATCH] Make GGML threads spawn 10x faster This change causes GGML threads to be recycled. That way there's less of a gap between predictions in the tracing diagram. It has the most impact on smaller models like TinyLLaMA where I'm seeing a ~15% boost in tokens per second when generating text. The new llamafiler embeddings server is going ~25% faster, handling ~1000 requests per second on my workstation. This change also boosts my TinyLLaMA prefill speed ~20%, by letting CONT be multi-threaded which Slaren apparently discovered upstream last month --- llama.cpp/ggml.c | 34 +++-- llamafile/BUILD.mk | 28 ++-- llamafile/core_manager.cpp | 6 +- llamafile/llamafile.c | 2 +- llamafile/pool.cpp | 212 ++++++++++++++++++++++++++++++ llamafile/pool.h | 15 +++ llamafile/pool_cancel_test.cpp | 36 +++++ llamafile/pool_test.cpp | 87 ++++++++++++ llamafile/server/main.cpp | 4 +- llamafile/server/server.cpp | 8 +- llamafile/server/worker.cpp | 4 +- llamafile/tinyblas_cpu_mixmul.inc | 1 + llamafile/zipalign.c | 12 +- 13 files changed, 406 insertions(+), 43 deletions(-) create mode 100644 llamafile/pool.cpp create mode 100644 llamafile/pool.h create mode 100644 llamafile/pool_cancel_test.cpp create mode 100644 llamafile/pool_test.cpp diff --git a/llama.cpp/ggml.c b/llama.cpp/ggml.c index fc306a8f2f..519c25deae 100644 --- a/llama.cpp/ggml.c +++ b/llama.cpp/ggml.c @@ -44,6 +44,7 @@ SOFTWARE."); #include "llamafile/thread.h" #include "llamafile/crash.h" #include "llamafile/trace.h" +#include "llamafile/pool.h" #include #include @@ -1651,7 +1652,7 @@ struct ggml_compute_state_shared { void* abort_callback_data; }; -typedef pthread_t ggml_thread_t; +typedef llamafile_task_t ggml_thread_t; struct ggml_compute_state { _Atomic(ggml_thread_t) thrd; @@ -13302,6 +13303,7 @@ GGML_CALL void ggml_rope_yarn_corr_dims( dims[1] = MIN(n_dims - 1, end); } +__target_clones("avx2") // [jart] static void ggml_compute_forward_rope_f32( const struct ggml_compute_params * params, struct ggml_tensor * dst, @@ -18355,10 +18357,11 @@ typedef int ggml_lock_t; #define GGML_LOCK_INITIALIZER 0 -typedef pthread_t ggml_thread_t; +typedef llamafile_task_t ggml_thread_t; -#define ggml_thread_create llamafile_thread_create // [jart] -#define ggml_thread_join pthread_join +#define ggml_thread_create llamafile_task_create // [jart] +#define ggml_thread_cancel llamafile_task_cancel +#define ggml_thread_join llamafile_task_join #else @@ -18382,10 +18385,11 @@ typedef int ggml_lock_t; #define GGML_LOCK_INITIALIZER 0 -typedef pthread_t ggml_thread_t; +typedef llamafile_task_t ggml_thread_t; -#define ggml_thread_create pthread_create -#define ggml_thread_join pthread_join +#define ggml_thread_create llamafile_task_create // [jart] +#define ggml_thread_cancel llamafile_task_cancel +#define ggml_thread_join llamafile_task_join #endif @@ -18484,6 +18488,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_ switch (node->op) { case GGML_OP_CPY: case GGML_OP_DUP: + case GGML_OP_CONT: // [jart] don't move me case GGML_OP_ADD: case GGML_OP_ADD1: case GGML_OP_ACC: @@ -18568,7 +18573,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_ } break; case GGML_OP_SCALE: case GGML_OP_SET: - case GGML_OP_CONT: case GGML_OP_RESHAPE: case GGML_OP_VIEW: case GGML_OP_PERMUTE: @@ -19185,10 +19189,10 @@ static void ggml_compute_canceled(void *arg) { struct ggml_compute_cleanup *cleanup = arg; clear_numa_thread_affinity(); for (int j = 1; j < cleanup->n_threads; j++) { - pthread_t t; + ggml_thread_t t; if ((t = atomic_exchange_explicit(&cleanup->workers[j].thrd, 0, memory_order_relaxed))) { - pthread_cancel(t); + ggml_thread_cancel(t); const int rc = ggml_thread_join(t, NULL); GGML_ASSERT(rc == 0); } @@ -19241,14 +19245,8 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl .is_main_thread = false, // [jart] }; - pthread_attr_t attr; - pthread_attr_init(&attr); - pthread_attr_setstacksize(&attr, 128 * 1024); - pthread_attr_setguardsize(&attr, sysconf(_SC_PAGESIZE)); - pthread_attr_setsigaltstacksize_np(&attr, sysconf(_SC_MINSIGSTKSZ) + 16384); - const int rc = ggml_thread_create((pthread_t *)&workers[j].thrd, &attr, + const int rc = ggml_thread_create((ggml_thread_t *)&workers[j].thrd, ggml_graph_compute_thread, &workers[j]); - pthread_attr_destroy(&attr); GGML_ASSERT(rc == 0); UNUSED(rc); } @@ -19276,7 +19274,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl int cs; pthread_setcancelstate(PTHREAD_CANCEL_MASKED, &cs); for (int j = 1; j < n_threads; j++) { - pthread_t t; + ggml_thread_t t; if ((t = atomic_exchange_explicit(&workers[j].thrd, 0, memory_order_relaxed))) { const int rc = ggml_thread_join(t, NULL); diff --git a/llamafile/BUILD.mk b/llamafile/BUILD.mk index 0e599dbb18..86dae40a73 100644 --- a/llamafile/BUILD.mk +++ b/llamafile/BUILD.mk @@ -42,14 +42,16 @@ o/$(MODE)/llamafile/tokenize: \ o/$(MODE)/llama.cpp/llama.cpp.a .PHONY: o/$(MODE)/llamafile -o/$(MODE)/llamafile: \ - $(LLAMAFILE_OBJS) \ - o/$(MODE)/llamafile/server \ - o/$(MODE)/llamafile/simple \ - o/$(MODE)/llamafile/zipalign \ - o/$(MODE)/llamafile/zipcheck \ - o/$(MODE)/llamafile/tokenize \ - o/$(MODE)/llamafile/addnl \ +o/$(MODE)/llamafile: \ + $(LLAMAFILE_OBJS) \ + o/$(MODE)/llamafile/server \ + o/$(MODE)/llamafile/simple \ + o/$(MODE)/llamafile/zipalign \ + o/$(MODE)/llamafile/zipcheck \ + o/$(MODE)/llamafile/tokenize \ + o/$(MODE)/llamafile/addnl \ + o/$(MODE)/llamafile/pool_test.runs \ + o/$(MODE)/llamafile/pool_cancel_test.runs \ ################################################################################ # microarchitectures @@ -141,6 +143,16 @@ o/$(MODE)/llamafile/tinyblas_cpu_sgemm_arm82.o: \ ################################################################################ # testing +o/$(MODE)/llamafile/pool_test: \ + o/$(MODE)/llamafile/pool_test.o \ + o/$(MODE)/llamafile/crash.o \ + o/$(MODE)/llamafile/pool.o \ + +o/$(MODE)/llamafile/pool_cancel_test: \ + o/$(MODE)/llamafile/pool_cancel_test.o \ + o/$(MODE)/llamafile/crash.o \ + o/$(MODE)/llamafile/pool.o \ + o/$(MODE)/llamafile/thread_test: \ o/$(MODE)/llamafile/thread_test.o \ o/$(MODE)/llamafile/crash.o \ diff --git a/llamafile/core_manager.cpp b/llamafile/core_manager.cpp index 05bbf65531..d7627c0fd8 100644 --- a/llamafile/core_manager.cpp +++ b/llamafile/core_manager.cpp @@ -36,8 +36,8 @@ static void unlock_mutex(void *arg) { } int CoreManager::acquire(int need, int greed) { - unassert(need >= 1); - unassert(greed >= need); + npassert(need >= 1); + npassert(greed >= need); int got = 0; @@ -80,5 +80,5 @@ void CoreManager::release(int count) { } pthread_cond_signal(&cv_); pthread_mutex_unlock(&mu_); - unassert(ok); + npassert(ok); } diff --git a/llamafile/llamafile.c b/llamafile/llamafile.c index a98755f05f..161d31155d 100644 --- a/llamafile/llamafile.c +++ b/llamafile/llamafile.c @@ -330,7 +330,7 @@ size_t llamafile_tell(struct llamafile *file) { if (!file->fp) return file->position; long ret = ftell(file->fp); - unassert(ret != -1); // shouldn't fail because we seeked earlier + npassert(ret != -1); // shouldn't fail because we seeked earlier return (size_t)ret; } diff --git a/llamafile/pool.cpp b/llamafile/pool.cpp new file mode 100644 index 0000000000..0fc5ce426b --- /dev/null +++ b/llamafile/pool.cpp @@ -0,0 +1,212 @@ +// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*- +// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi +// +// Copyright 2024 Mozilla Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pool.h" + +#include +#include +#include +#include +#include +#include + +#include "threadlocal.h" + +struct llamafile_thread; +static void llamafile_thread_canceled(llamafile_thread *); +static ThreadLocal g_key(llamafile_thread_canceled); + +struct llamafile_task { + pthread_cond_t cv = PTHREAD_COND_INITIALIZER; + pthread_mutex_t mu = PTHREAD_MUTEX_INITIALIZER; + void *(*func)(void *); + void *arg; + void *res; + pthread_t th = -1; +}; + +struct llamafile_thread { + pthread_cond_t cv = PTHREAD_COND_INITIALIZER; + pthread_mutex_t mu = PTHREAD_MUTEX_INITIALIZER; + llamafile_task *task; + llamafile_thread *next; + pthread_t th; +}; + +static atomic_int g_active; +static _Atomic(llamafile_thread *) g_idle; + +static void unlock_mutex(void *arg) { + pthread_mutex_t *mu = (pthread_mutex_t *)arg; + pthread_mutex_unlock(mu); +} + +static void idle_push(llamafile_thread *thread) { + int backoff = 0; + thread->next = atomic_load_explicit(&g_idle, memory_order_relaxed); + while (!atomic_compare_exchange_weak_explicit(&g_idle, &thread->next, thread, + memory_order_acq_rel, memory_order_relaxed)) + backoff = pthread_delay_np(&g_idle, backoff); +} + +static llamafile_thread *idle_pop(void) { + int backoff = 0; + llamafile_thread *thread; + for (;;) { + if ((thread = atomic_load_explicit(&g_idle, memory_order_relaxed))) { + if (atomic_compare_exchange_weak_explicit(&g_idle, &thread, thread->next, + memory_order_acq_rel, memory_order_relaxed)) + return thread; + backoff = pthread_delay_np(g_idle, backoff); + } else { + return nullptr; + } + } +} + +static void cancel_task(llamafile_task *task) { + pthread_mutex_lock(&task->mu); + task->res = PTHREAD_CANCELED; + task->th = 0; + pthread_cond_signal(&task->cv); + pthread_mutex_unlock(&task->mu); +} + +static void llamafile_thread_canceled(llamafile_thread *thread) { + thread->th = 0; + cancel_task(thread->task); + delete thread; + --g_active; +} + +static void *llamafile_thread_worker(void *arg) { + errno_t err; + llamafile_thread *thread = (llamafile_thread *)arg; + + ++g_active; + g_key.set(thread); + do { + void *res = thread->task->func(thread->task->arg); + pthread_setcancelstate(PTHREAD_CANCEL_MASKED, 0); + + pthread_mutex_lock(&thread->task->mu); + thread->task->res = res; + thread->task->th = 0; + pthread_cond_signal(&thread->task->cv); + pthread_mutex_unlock(&thread->task->mu); + + pthread_cleanup_push(unlock_mutex, &thread->mu); + pthread_mutex_lock(&thread->mu); + thread->task = nullptr; + idle_push(thread); + while (!thread->task) { + err = pthread_cond_wait(&thread->cv, &thread->mu); + if (err == ECANCELED) + break; + } + pthread_cleanup_pop(true); + pthread_setcancelstate(PTHREAD_CANCEL_DEFERRED, 0); + } while (err != ECANCELED); + + if (thread->task) + cancel_task(thread->task); + + thread->th = 0; + g_key.set(nullptr); + delete thread; + --g_active; + + return 0; +} + +static errno_t llamafile_thread_create(llamafile_task *task) { + llamafile_thread *thread = new llamafile_thread; + thread->task = task; + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, 128 * 1024); + pthread_attr_setguardsize(&attr, sysconf(_SC_PAGESIZE)); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + pthread_attr_setsigaltstacksize_np(&attr, sysconf(_SC_MINSIGSTKSZ) + 16384); + errno_t err = pthread_create(&thread->th, &attr, llamafile_thread_worker, thread); + pthread_attr_destroy(&attr); + if (!err) { + task->th = thread->th; + } else { + delete thread; + } + return err; +} + +errno_t llamafile_task_create(llamafile_task **out_task, void *(*func)(void *), void *arg) { + llamafile_task *task = new llamafile_task; + task->func = func; + task->arg = arg; + errno_t err; + llamafile_thread *thread; + if ((thread = idle_pop())) { + pthread_mutex_lock(&thread->mu); + thread->task = task; + task->th = thread->th; + pthread_cond_signal(&thread->cv); + pthread_mutex_unlock(&thread->mu); + err = 0; + } else { + err = llamafile_thread_create(task); + } + if (!err) { + *out_task = task; + } else { + delete task; + } + return err; +} + +errno_t llamafile_task_join(llamafile_task *task, void **out_res) { + pthread_cleanup_push(unlock_mutex, &task->mu); + pthread_mutex_lock(&task->mu); + while (task->th) + pthread_cond_wait(&task->cv, &task->mu); + pthread_cleanup_pop(true); + if (out_res) + *out_res = task->res; + delete task; + return 0; +} + +errno_t llamafile_task_cancel(llamafile_task *task) { + errno_t err = 0; + if (task->th) + err = pthread_cancel(task->th); + return err; +} + +void llamafile_task_shutdown(void) { + llamafile_thread *thread; + while ((thread = idle_pop())) + if (thread->th) + pthread_cancel(thread->th); + int backoff = 0; + while (g_active) + backoff = pthread_delay_np(&g_idle, backoff); +} + +static struct llamafile_tasks { + ~llamafile_tasks(void) { + llamafile_task_shutdown(); + } +} g_tasks; diff --git a/llamafile/pool.h b/llamafile/pool.h new file mode 100644 index 0000000000..22d047787b --- /dev/null +++ b/llamafile/pool.h @@ -0,0 +1,15 @@ +#pragma once +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct llamafile_task *llamafile_task_t; + +errno_t llamafile_task_create(llamafile_task_t *, void *(*)(void *), void *); +errno_t llamafile_task_join(llamafile_task_t, void **); +errno_t llamafile_task_cancel(llamafile_task_t); +void llamafile_task_shutdown(void); + +#ifdef __cplusplus +} +#endif diff --git a/llamafile/pool_cancel_test.cpp b/llamafile/pool_cancel_test.cpp new file mode 100644 index 0000000000..bd52177dd9 --- /dev/null +++ b/llamafile/pool_cancel_test.cpp @@ -0,0 +1,36 @@ +// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*- +// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi +// +// Copyright 2024 Mozilla Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pool.h" + +#include +#include +#include + +void *waiter(void *arg) { + pause(); + return 0; +} + +int main(int argc, char *argv[]) { + ShowCrashReports(); + llamafile_task_t task; + npassert(!llamafile_task_create(&task, waiter, 0)); + npassert(!llamafile_task_cancel(task)); + npassert(!llamafile_task_join(task, 0)); + CheckForMemoryLeaks(); +} diff --git a/llamafile/pool_test.cpp b/llamafile/pool_test.cpp new file mode 100644 index 0000000000..148835e3c1 --- /dev/null +++ b/llamafile/pool_test.cpp @@ -0,0 +1,87 @@ +// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*- +// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi +// +// Copyright 2024 Mozilla Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pool.h" + +#include +#include +#include +#include + +#define BENCHMARK(ITERATIONS, WORK_PER_RUN, CODE) \ + do { \ + struct timespec start = timespec_real(); \ + for (int __i = 0; __i < ITERATIONS; ++__i) { \ + asm volatile("" ::: "memory"); \ + CODE; \ + } \ + long long work = ((WORK_PER_RUN) ? (WORK_PER_RUN) : 1) * (ITERATIONS); \ + double nanos = \ + (timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / (double)work; \ + if (nanos < 1000) { \ + kprintf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \ + } else { \ + kprintf("%10lld ns %2dx %s\n", (long long)nanos, (ITERATIONS), #CODE); \ + } \ + } while (0) + +void *noop(void *arg) { + return arg; +} + +void run_task() { + llamafile_task_t task; + npassert(!llamafile_task_create(&task, noop, 0)); + npassert(!llamafile_task_join(task, 0)); +} + +void run_thread() { + pthread_t task; + npassert(!pthread_create(&task, 0, noop, 0)); + npassert(!pthread_join(task, 0)); +} + +#define N 20 + +void run_many_tasks() { + llamafile_task_t task[N]; + for (int i = 0; i < N; ++i) + npassert(!llamafile_task_create(&task[i], noop, 0)); + for (int i = 0; i < N; ++i) + npassert(!llamafile_task_join(task[i], 0)); +} + +void run_many_threads() { + pthread_t task[N]; + for (int i = 0; i < N; ++i) + npassert(!pthread_create(&task[i], 0, noop, 0)); + for (int i = 0; i < N; ++i) + npassert(!pthread_join(task[i], 0)); +} + +int main(int argc, char *argv[]) { + ShowCrashReports(); + run_many_tasks(); + BENCHMARK(10, 1, run_task()); + BENCHMARK(10, 1, run_thread()); + BENCHMARK(10, N, run_many_tasks()); + BENCHMARK(10, N, run_many_threads()); + llamafile_task_shutdown(); + while (!pthread_orphan_np()) + pthread_decimate_np(); + CheckForMemoryLeaks(); +} diff --git a/llamafile/server/main.cpp b/llamafile/server/main.cpp index 1740cb55ea..9f7055de6b 100644 --- a/llamafile/server/main.cpp +++ b/llamafile/server/main.cpp @@ -20,6 +20,7 @@ #include "llama.cpp/llama.h" #include "llamafile/llamafile.h" +#include "llamafile/pool.h" #include "llamafile/version.h" #include "log.h" @@ -75,7 +76,7 @@ main(int argc, char* argv[]) set_thread_name("server"); g_server = new Server(create_listening_socket(FLAG_listen)); for (int i = 0; i < FLAG_workers; ++i) - unassert(!g_server->spawn()); + npassert(!g_server->spawn()); // run server signals_init(); @@ -94,6 +95,7 @@ main(int argc, char* argv[]) SLOG("exit"); // quality assurance + llamafile_task_shutdown(); while (!pthread_orphan_np()) pthread_decimate_np(); CheckForMemoryLeaks(); diff --git a/llamafile/server/server.cpp b/llamafile/server/server.cpp index bf9fa248f9..d3cf36aca4 100644 --- a/llamafile/server/server.cpp +++ b/llamafile/server/server.cpp @@ -38,10 +38,10 @@ Server::Server(int fd) : fd(fd) Server::~Server() { - unassert(fd == -1); - unassert(!worker_count.load(std::memory_order_relaxed)); - unassert(dll_is_empty(active_workers)); - unassert(dll_is_empty(idle_workers)); + npassert(fd == -1); + npassert(!worker_count.load(std::memory_order_relaxed)); + npassert(dll_is_empty(active_workers)); + npassert(dll_is_empty(idle_workers)); pthread_mutex_destroy(&lock_); pthread_cond_destroy(&cond_); } diff --git a/llamafile/server/worker.cpp b/llamafile/server/worker.cpp index e7d0dd3e78..2750225f80 100644 --- a/llamafile/server/worker.cpp +++ b/llamafile/server/worker.cpp @@ -42,7 +42,7 @@ Worker::kill() void Worker::begin() { - unassert(!working); + npassert(!working); server->lock(); dll_remove(&server->idle_workers, &elem); if (dll_is_empty(server->idle_workers)) { @@ -60,7 +60,7 @@ Worker::begin() void Worker::end() { - unassert(working); + npassert(working); server->lock(); dll_remove(&server->active_workers, &elem); working = false; diff --git a/llamafile/tinyblas_cpu_mixmul.inc b/llamafile/tinyblas_cpu_mixmul.inc index eb61f93c89..f897bdafd5 100644 --- a/llamafile/tinyblas_cpu_mixmul.inc +++ b/llamafile/tinyblas_cpu_mixmul.inc @@ -288,6 +288,7 @@ class MixMul { case GGML_TASK_TYPE_COMPUTE: assert(!(cols % BS)); assert(!(weights->nb[1] % sizeof(TA))); + // TODO(jart): parallelize this loop for (int expert = 0; expert < experts; ++expert) { BLAS tb{cols / BS, (const TA *)((const char *)weights->data + expert * weights->nb[2]), diff --git a/llamafile/zipalign.c b/llamafile/zipalign.c index 3ef8d6c623..7dca10a087 100644 --- a/llamafile/zipalign.c +++ b/llamafile/zipalign.c @@ -312,7 +312,7 @@ int main(int argc, char *argv[]) { case Z_MEM_ERROR: DieOom(); default: - unassert(!"deflateInit2() called with invalid parameters"); + npassert(!"deflateInit2() called with invalid parameters"); } } @@ -345,7 +345,7 @@ int main(int argc, char *argv[]) { case Z_MEM_ERROR: DieOom(); case Z_STREAM_ERROR: - unassert(!"deflate() stream error"); + npassert(!"deflate() stream error"); default: break; } @@ -357,7 +357,7 @@ int main(int argc, char *argv[]) { } } if (flag_level) - unassert(deflateEnd(&zs) == Z_OK); + npassert(deflateEnd(&zs) == Z_OK); // write local file header uint8_t *lochdr = Malloc(hdrlen); @@ -381,7 +381,7 @@ int main(int argc, char *argv[]) { p = ZIP_WRITE64(p, size); // uncompressed size p = ZIP_WRITE64(p, compsize); // compressed size - unassert(p == lochdr + hdrlen); + npassert(p == lochdr + hdrlen); if (pwrite(zfd, lochdr, hdrlen, zsize) != hdrlen) DieSys(zpath); free(lochdr); @@ -418,7 +418,7 @@ int main(int argc, char *argv[]) { p = ZIP_WRITE64(p, size); // uncompressed size p = ZIP_WRITE64(p, compsize); // compressed size p = ZIP_WRITE64(p, zsize); // lfile offset - unassert(p == cdirhdr + hdrlen); + npassert(p == cdirhdr + hdrlen); // finish up ++cnt; @@ -461,7 +461,7 @@ int main(int argc, char *argv[]) { p = ZIP_WRITE32(p, cdirsize); // size of central directory p = ZIP_WRITE32(p, 0xffffffffu); // offset of central directory p = ZIP_WRITE16(p, 0); // comment length - unassert(p == eocd + sizeof(eocd)); + npassert(p == eocd + sizeof(eocd)); if (pwrite(zfd, eocd, sizeof(eocd), zsize + cdirsize) != sizeof(eocd)) DieSys(zpath);