diff --git a/include/finufft_opts.h b/include/finufft_opts.h index 372bbbb6f..d8334ffba 100644 --- a/include/finufft_opts.h +++ b/include/finufft_opts.h @@ -12,6 +12,7 @@ typedef struct finufft_opts { // defaults see finufft_core.cpp:finufft_default_o int modeord; // (type 1,2 only): 0 CMCL-style increasing mode order // 1 FFT-style mode order int chkbnds; // [DEPRECATED] 0 don't check NU pts in [-3pi,3pi), 1 do ( &ker1, // --------- batch helper functions for t1,2 exec: --------------------------- template -static int spreadinterpSortedBatch(int batchSize, FINUFFT_PLAN_T *p, - std::complex *cBatch) +static int spreadinterpSortedBatch(int batchSize, FINUFFT_PLAN_T *p, std::complex *fwBatch, std::complex *cBatch) /* Spreads (or interpolates) a batch of batchSize strength vectors in cBatch to (or from) the batch of fine working grids p->fwBatch, using the same set of @@ -447,7 +446,7 @@ static int spreadinterpSortedBatch(int batchSize, FINUFFT_PLAN_T *p, #endif #pragma omp parallel for num_threads(nthr_outer) for (int i = 0; i < batchSize; i++) { - std::complex *fwi = p->fwBatch.data() + i * p->nf; // start of i'th fw array in + std::complex *fwi = fwBatch + i * p->nf; // start of i'th fw array in // wkspace std::complex *ci = cBatch + i * p->nj; // start of i'th c array in cBatch spreadinterpSorted(p->sortIndices, p->nf1, p->nf2, p->nf3, (T *)fwi, p->nj, p->X, @@ -530,6 +529,7 @@ void finufft_default_opts_t(finufft_opts *o) o->spread_kerpad = 1; o->upsampfac = 0.0; o->spread_thread = 0; + o->spreadinterponly = 0; o->maxbatchsize = 0; o->spread_nthr_atomic = -1; o->spread_max_sp_size = 0; @@ -560,8 +560,9 @@ FINUFFT_PLAN_T::FINUFFT_PLAN_T(int type_, int dim_, const BIGINT *n_modes, i printf("[%s] new plan: FINUFFT version " FINUFFT_VER " .................\n", __func__); - fftPlan = std::make_unique>( - opts.fftw_lock_fun, opts.fftw_unlock_fun, opts.fftw_lock_data); + if (!opts.spreadinterponly) // Dont make plans if only spread or interpolate + fftPlan = std::make_unique>( + opts.fftw_lock_fun, opts.fftw_unlock_fun, opts.fftw_lock_data); if ((type != 1) && (type != 2) && (type != 3)) { fprintf(stderr, "[%s] Invalid type (%d), should be 1, 2 or 3.\n", __func__, type); @@ -668,66 +669,72 @@ FINUFFT_PLAN_T::FINUFFT_PLAN_T(int type_, int dim_, const BIGINT *n_modes, i __func__, (double)(EPSILON * mu)); } - // determine fine grid sizes, sanity check.. int nfier = set_nf_type12(ms, opts, spopts, &nf1); if (nfier) throw nfier; // nf too big; we're done - phiHat1.resize(nf1 / 2 + 1); if (dim > 1) { nfier = set_nf_type12(mt, opts, spopts, &nf2); if (nfier) throw nfier; - phiHat2.resize(nf2 / 2 + 1); } if (dim > 2) { nfier = set_nf_type12(mu, opts, spopts, &nf3); if (nfier) throw nfier; - phiHat3.resize(nf3 / 2 + 1); - } - - if (opts.debug) { // "long long" here is to avoid warnings with printf... - printf("[%s] %dd%d: (ms,mt,mu)=(%lld,%lld,%lld) " - "(nf1,nf2,nf3)=(%lld,%lld,%lld)\n ntrans=%d nthr=%d " - "batchSize=%d ", - __func__, dim, type, (long long)ms, (long long)mt, (long long)mu, - (long long)nf1, (long long)nf2, (long long)nf3, ntrans, nthr, batchSize); - if (batchSize == 1) // spread_thread has no effect in this case - printf("\n"); - else - printf(" spread_thread=%d\n", opts.spread_thread); } + if(!opts.spreadinterponly) // We dont need fseries if it is spreadinterponly. + { + // determine fine grid sizes, sanity check.. + phiHat1.resize(nf1 / 2 + 1); + if (dim > 1) { + phiHat2.resize(nf2 / 2 + 1); + } + if (dim > 2) { + phiHat3.resize(nf3 / 2 + 1); + } - // STEP 0: get Fourier coeffs of spreading kernel along each fine grid dim - CNTime timer; - timer.start(); - onedim_fseries_kernel(nf1, phiHat1, spopts); - if (dim > 1) onedim_fseries_kernel(nf2, phiHat2, spopts); - if (dim > 2) onedim_fseries_kernel(nf3, phiHat3, spopts); - if (opts.debug) - printf("[%s] kernel fser (ns=%d):\t\t%.3g s\n", __func__, spopts.nspread, - timer.elapsedsec()); + if (opts.debug) { // "long long" here is to avoid warnings with printf... + printf("[%s] %dd%d: (ms,mt,mu)=(%lld,%lld,%lld) " + "(nf1,nf2,nf3)=(%lld,%lld,%lld)\n ntrans=%d nthr=%d " + "batchSize=%d ", + __func__, dim, type, (long long)ms, (long long)mt, (long long)mu, + (long long)nf1, (long long)nf2, (long long)nf3, ntrans, nthr, batchSize); + if (batchSize == 1) // spread_thread has no effect in this case + printf("\n"); + else + printf(" spread_thread=%d\n", opts.spread_thread); + } - nf = nf1 * nf2 * nf3; // fine grid total number of points - if (nf * batchSize > MAX_NF) { - fprintf( - stderr, - "[%s] fwBatch would be bigger than MAX_NF, not attempting memory allocation!\n", - __func__); - throw int(FINUFFT_ERR_MAXNALLOC); + // STEP 0: get Fourier coeffs of spreading kernel along each fine grid dim + CNTime timer; + timer.start(); + onedim_fseries_kernel(nf1, phiHat1, spopts); + if (dim > 1) onedim_fseries_kernel(nf2, phiHat2, spopts); + if (dim > 2) onedim_fseries_kernel(nf3, phiHat3, spopts); + if (opts.debug) + printf("[%s] kernel fser (ns=%d):\t\t%.3g s\n", __func__, spopts.nspread, + timer.elapsedsec()); + + nf = nf1 * nf2 * nf3; // fine grid total number of points + if (nf * batchSize > MAX_NF) { + fprintf( + stderr, + "[%s] fwBatch would be bigger than MAX_NF, not attempting memory allocation!\n", + __func__); + throw int(FINUFFT_ERR_MAXNALLOC); + } + + timer.restart(); + fwBatch.resize(nf * batchSize); // the big workspace + if (opts.debug) + printf("[%s] fwBatch %.2fGB alloc: \t%.3g s\n", __func__, + (double)1E-09 * sizeof(std::complex) * nf * batchSize, + timer.elapsedsec()); + + timer.restart(); // plan the FFTW + const auto ns = gridsize_for_fft(this); + fftPlan->plan(ns, batchSize, fwBatch.data(), fftSign, opts.fftw, nthr_fft); + if (opts.debug) + printf("[%s] FFT plan (mode %d, nthr=%d):\t%.3g s\n", __func__, opts.fftw, nthr_fft, + timer.elapsedsec()); } - - timer.restart(); - fwBatch.resize(nf * batchSize); // the big workspace - if (opts.debug) - printf("[%s] fwBatch %.2fGB alloc: \t%.3g s\n", __func__, - (double)1E-09 * sizeof(std::complex) * nf * batchSize, - timer.elapsedsec()); - - timer.restart(); // plan the FFTW - const auto ns = gridsize_for_fft(this); - fftPlan->plan(ns, batchSize, fwBatch.data(), fftSign, opts.fftw, nthr_fft); - if (opts.debug) - printf("[%s] FFT plan (mode %d, nthr=%d):\t%.3g s\n", __func__, opts.fftw, nthr_fft, - timer.elapsedsec()); - } else { // -------------------------- type 3 (no planning) ------------ if (opts.debug) printf("[%s] %dd%d: ntrans=%d\n", __func__, dim, type, ntrans); @@ -1041,28 +1048,33 @@ int FINUFFT_PLAN_T::execute(std::complex *cj, std::complex *fk) { // STEP 1: (varies by type) timer.restart(); if (type == 1) { // type 1: spread NU pts X, weights cj, to fw grid - spreadinterpSortedBatch(thisBatchSize, this, cjb); + spreadinterpSortedBatch(thisBatchSize, this, opts.spreadinterponly? fkb: this->fwBatch.data(), cjb); t_sprint += timer.elapsedsec(); - } else { // type 2: amplify Fourier coeffs fk into 0-padded fw + // Stop here if it is spread interp only. + if (opts.spreadinterponly) + continue; + } else if(!opts.spreadinterponly) { // type 2: amplify Fourier coeffs fk into 0-padded fw, but dont do it if it is spread interp only. deconvolveBatch(thisBatchSize, this, fkb); t_deconv += timer.elapsedsec(); } - - // STEP 2: call the FFT on this batch - timer.restart(); - do_fft(this); - t_fft += timer.elapsedsec(); - if (opts.debug > 1) printf("\tFFT exec:\t\t%.3g s\n", timer.elapsedsec()); - + if (!opts.spreadinterponly) // Do FFT only if its not spread interp only. + { + // STEP 2: call the FFT on this batch + timer.restart(); + do_fft(this); + t_fft += timer.elapsedsec(); + if (opts.debug > 1) printf("\tFFT exec:\t\t%.3g s\n", timer.elapsedsec()); + } // STEP 3: (varies by type) timer.restart(); if (type == 1) { // type 1: deconvolve (amplify) fw and shuffle to fk deconvolveBatch(thisBatchSize, this, fkb); t_deconv += timer.elapsedsec(); } else { // type 2: interpolate unif fw grid to NU target pts - spreadinterpSortedBatch(thisBatchSize, this, cjb); + spreadinterpSortedBatch(thisBatchSize, this, opts.spreadinterponly? fkb: this->fwBatch.data(), cjb); t_sprint += timer.elapsedsec(); } + // Release the fwBatch vector to prevent double freeing of memory. } // ........end b loop if (opts.debug) { // report total times in their natural order... @@ -1113,7 +1125,7 @@ int FINUFFT_PLAN_T::execute(std::complex *cj, std::complex *fk) { // STEP 1: spread c'_j batch (x'_j NU pts) into fw batch grid... timer.restart(); spopts.spread_direction = 1; // spread - spreadinterpSortedBatch(thisBatchSize, this, CpBatch.data()); // X are primed + spreadinterpSortedBatch(thisBatchSize, this, this->fwBatch.data(), CpBatch.data()); // X are primed t_spr += timer.elapsedsec(); // STEP 2: type 2 NUFFT from fw batch to user output fk array batch... diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index f6cf925e0..be1abfeb0 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -2160,7 +2160,7 @@ FINUFFT_EXPORT int FINUFFT_CDECL setup_spreader(finufft_spread_opts &opts, T eps upsampfac); return FINUFFT_ERR_HORNER_WRONG_BETA; } - if (upsampfac <= 1.0) { // no digits would result + if (upsampfac < 1.0) { // no digits would result fprintf(stderr, "FINUFFT setup_spreader: error, upsampfac=%.3g is <=1.0\n", upsampfac); return FINUFFT_ERR_UPSAMPFAC_TOO_SMALL;