Skip to content

Commit

Permalink
Merge pull request flintlib#1729 from flintlib/sdpoly
Browse files Browse the repository at this point in the history
Speed up constructing Swinnerton-Dyer polynomials using deflation hack
  • Loading branch information
fredrik-johansson authored Jan 19, 2024
2 parents 1c91ea9 + e98807c commit 0a001dd
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 42 deletions.
53 changes: 21 additions & 32 deletions examples/swinnerton_dyer_poly.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,9 @@
#include "profiler.h"
#include "ca.h"
#include "ca_vec.h"
#include "ca_poly.h"
#include "ulong_extras.h"

void
_ca_poly_mullow(ca_ptr res, ca_srcptr x, slong xlen,
ca_srcptr y, slong ylen, slong len, ca_ctx_t ctx)
{
slong i, j;
ca_t t;

for (i = 0; i < len; i++)
ca_zero(res + i, ctx);

ca_init(t, ctx);

for (i = 0; i < xlen; i++)
{
for (j = 0; j < FLINT_MIN(ylen, len - i); j++)
{
ca_mul(t, x + i, y + j, ctx);
ca_add(res + i + j, res + i + j, t, ctx);
}
}

ca_clear(t, ctx);
}

void
swinnerton_dyer_poly(ca_ptr T, ulong n, slong trunc, ca_ctx_t ctx)
{
Expand All @@ -44,15 +21,15 @@ swinnerton_dyer_poly(ca_ptr T, ulong n, slong trunc, ca_ctx_t ctx)
ca_one(one, ctx);

square_roots = _ca_vec_init(n, ctx);
tmp1 = flint_malloc((N/2 + 1) * sizeof(ca_struct));
tmp2 = flint_malloc((N/2 + 1) * sizeof(ca_struct));
tmp3 = _ca_vec_init(N, ctx);
tmp1 = flint_malloc((N / 4 + 1) * sizeof(ca_struct));
tmp2 = flint_malloc((N / 4 + 1) * sizeof(ca_struct));
tmp3 = _ca_vec_init(N / 2, ctx);

for (i = 0; i < n; i++)
ca_sqrt_ui(square_roots + i, n_nth_prime(i + 1), ctx);

/* Build linear factors */
for (i = 0; i < N; i++)
/* Build deflated quadratic factors */
for (i = 0; i < N / 2; i++)
{
ca_zero(T + i, ctx);

Expand All @@ -63,14 +40,17 @@ swinnerton_dyer_poly(ca_ptr T, ulong n, slong trunc, ca_ctx_t ctx)
else
ca_sub(T + i, T + i, square_roots + j, ctx);
}

ca_sqr(T + i, T + i, ctx);
ca_neg(T + i, T + i, ctx);
}

/* For each level... */
for (i = 0; i < n; i++)
for (i = 0; i < n - 1; i++)
{
slong stride = UWORD(1) << i;

for (j = 0; j < N; j += 2*stride)
for (j = 0; j < N / 2; j += 2*stride)
{
for (k = 0; k < stride; k++)
{
Expand All @@ -87,11 +67,20 @@ swinnerton_dyer_poly(ca_ptr T, ulong n, slong trunc, ca_ctx_t ctx)
}
}

/* Inflate */
for (i = N - 2; i >= 0; i -= 2)
{
if (i < trunc)
ca_swap(T + i, T + i / 2, ctx);
if (i + 1 < trunc)
ca_zero(T + i + 1, ctx);
}

ca_one(T + N, ctx);
_ca_vec_clear(square_roots, n, ctx);
flint_free(tmp1);
flint_free(tmp2);
_ca_vec_clear(tmp3, UWORD(1) << n, ctx);
_ca_vec_clear(tmp3, N / 2, ctx);
ca_clear(one, ctx);
}

Expand Down
32 changes: 22 additions & 10 deletions src/arb_poly/swinnerton_dyer_ui.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright (C) 2015 Fredrik Johansson
Copyright (C) 2015, 2024 Fredrik Johansson
This file is part of FLINT.
Expand Down Expand Up @@ -65,15 +65,15 @@ _arb_poly_swinnerton_dyer_ui(arb_ptr T, ulong n, slong trunc, slong prec)
arb_one(one);

square_roots = _arb_vec_init(n);
tmp1 = flint_malloc((N/2 + 1) * sizeof(arb_struct));
tmp2 = flint_malloc((N/2 + 1) * sizeof(arb_struct));
tmp3 = _arb_vec_init(N);
tmp1 = flint_malloc((N / 4 + 1) * sizeof(arb_struct));
tmp2 = flint_malloc((N / 4 + 1) * sizeof(arb_struct));
tmp3 = _arb_vec_init(N / 2);

for (i = 0; i < n; i++)
arb_sqrt_ui(square_roots + i, n_nth_prime(i + 1), prec);

/* Build linear factors */
for (i = 0; i < N; i++)
/* Build deflated quadratic factors */
for (i = 0; i < N / 2; i++)
{
arb_zero(T + i);

Expand All @@ -84,14 +84,17 @@ _arb_poly_swinnerton_dyer_ui(arb_ptr T, ulong n, slong trunc, slong prec)
else
arb_sub(T + i, T + i, square_roots + j, prec);
}

arb_sqr(T + i, T + i, prec);
arb_neg(T + i, T + i);
}

/* For each level... */
for (i = 0; i < n; i++)
for (i = 0; i < n - 1; i++)
{
slong stride = UWORD(1) << i;

for (j = 0; j < N; j += 2*stride)
for (j = 0; j < N / 2; j += 2 * stride)
{
for (k = 0; k < stride; k++)
{
Expand All @@ -104,15 +107,24 @@ _arb_poly_swinnerton_dyer_ui(arb_ptr T, ulong n, slong trunc, slong prec)

_arb_poly_mullow(tmp3, tmp1, stride + 1, tmp2, stride + 1,
FLINT_MIN(2 * stride, trunc), prec);
_arb_vec_set(T + j, tmp3, FLINT_MIN(2 * stride, trunc));
_arb_vec_swap(T + j, tmp3, FLINT_MIN(2 * stride, trunc));
}
}

/* Inflate */
for (i = N - 2; i >= 0; i -= 2)
{
if (i < trunc)
arb_swap(T + i, T + i / 2);
if (i + 1 < trunc)
arb_zero(T + i + 1);
}

arb_one(T + N);
_arb_vec_clear(square_roots, n);
flint_free(tmp1);
flint_free(tmp2);
_arb_vec_clear(tmp3, UWORD(1) << n);
_arb_vec_clear(tmp3, N / 2);
arb_clear(one);
}

Expand Down

0 comments on commit 0a001dd

Please sign in to comment.