Skip to content

Commit

Permalink
mod intra_edge: Backport memory reduction from dav1d 1.2.1 (#830)
Browse files Browse the repository at this point in the history
Eliminate duplicate and constant `intra_edge` table entries
  • Loading branch information
randomPoison authored Mar 19, 2024
2 parents 1b75449 + 1453ee2 commit 57d50de
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 182 deletions.
52 changes: 24 additions & 28 deletions src/decode.c
Original file line number Diff line number Diff line change
Expand Up @@ -2390,19 +2390,19 @@ static int decode_sb(Dav1dTaskContext *const t, const enum BlockLevel bl,
if (bl == BL_8X8) {
const EdgeTip *const tip = (const EdgeTip *) node;
assert(hsz == 1);
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0]))
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, EDGE_ALL_TR_AND_BL))
return -1;
const enum Filter2d tl_filter = t->tl_4x4_filter;
t->bx++;
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1]))
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0]))
return -1;
t->bx--;
t->by++;
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2]))
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1]))
return -1;
t->bx++;
t->tl_4x4_filter = tl_filter;
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[3]))
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2]))
return -1;
t->bx--;
t->by--;
Expand Down Expand Up @@ -2435,91 +2435,87 @@ static int decode_sb(Dav1dTaskContext *const t, const enum BlockLevel bl,
}
break;
case PARTITION_T_TOP_SPLIT: {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, branch->tts[0]))
if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, EDGE_ALL_TR_AND_BL))
return -1;
t->bx += hsz;
if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, branch->tts[1]))
if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, node->v[1]))
return -1;
t->bx -= hsz;
t->by += hsz;
if (decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, branch->tts[2]))
if (decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, node->h[1]))
return -1;
t->by -= hsz;
break;
}
case PARTITION_T_BOTTOM_SPLIT: {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, branch->tbs[0]))
if (decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, node->h[0]))
return -1;
t->by += hsz;
if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, branch->tbs[1]))
if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, node->v[0]))
return -1;
t->bx += hsz;
if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, branch->tbs[2]))
if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, 0))
return -1;
t->bx -= hsz;
t->by -= hsz;
break;
}
case PARTITION_T_LEFT_SPLIT: {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, branch->tls[0]))
if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, EDGE_ALL_TR_AND_BL))
return -1;
t->by += hsz;
if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, branch->tls[1]))
if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, node->h[1]))
return -1;
t->by -= hsz;
t->bx += hsz;
if (decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, branch->tls[2]))
if (decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, node->v[1]))
return -1;
t->bx -= hsz;
break;
}
case PARTITION_T_RIGHT_SPLIT: {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, branch->trs[0]))
if (decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, node->v[0]))
return -1;
t->bx += hsz;
if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, branch->trs[1]))
if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, node->h[0]))
return -1;
t->by += hsz;
if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, branch->trs[2]))
if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, 0))
return -1;
t->by -= hsz;
t->bx -= hsz;
break;
}
case PARTITION_H4: {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4[0]))
if (decode_b(t, bl, b[0], PARTITION_H4, node->h[0]))
return -1;
t->by += hsz >> 1;
if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4[1]))
if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4))
return -1;
t->by += hsz >> 1;
if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4[2]))
if (decode_b(t, bl, b[0], PARTITION_H4, EDGE_ALL_LEFT_HAS_BOTTOM))
return -1;
t->by += hsz >> 1;
if (t->by < f->bh)
if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4[3]))
if (decode_b(t, bl, b[0], PARTITION_H4, node->h[1]))
return -1;
t->by -= hsz * 3 >> 1;
break;
}
case PARTITION_V4: {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4[0]))
if (decode_b(t, bl, b[0], PARTITION_V4, node->v[0]))
return -1;
t->bx += hsz >> 1;
if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4[1]))
if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4))
return -1;
t->bx += hsz >> 1;
if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4[2]))
if (decode_b(t, bl, b[0], PARTITION_V4, EDGE_ALL_TOP_HAS_RIGHT))
return -1;
t->bx += hsz >> 1;
if (t->bx < f->bw)
if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4[3]))
if (decode_b(t, bl, b[0], PARTITION_V4, node->v[1]))
return -1;
t->bx -= hsz * 3 >> 1;
break;
Expand Down
58 changes: 30 additions & 28 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3577,16 +3577,16 @@ unsafe fn decode_sb(
None => {
let tip = intra_edge.tip(sb128, edge_index);
assert!(hsz == 1);
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[0])?;
decode_b(c, t, f, bl, BS_4x4, bp, EdgeFlags::ALL_TR_AND_BL)?;
let tl_filter = t.tl_4x4_filter;
t.bx += 1;
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[1])?;
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[0])?;
t.bx -= 1;
t.by += 1;
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[2])?;
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[1])?;
t.bx += 1;
t.tl_4x4_filter = tl_filter;
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[3])?;
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[2])?;
t.bx -= 1;
t.by -= 1;
if cfg!(target_arch = "x86_64") && t.frame_thread.pass != 0 {
Expand Down Expand Up @@ -3614,68 +3614,70 @@ unsafe fn decode_sb(
}
}
PARTITION_T_TOP_SPLIT => {
let branch = intra_edge.branch(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, branch.tts[0])?;
let node = intra_edge.node(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, EdgeFlags::ALL_TR_AND_BL)?;
t.bx += hsz;
decode_b(c, t, f, bl, b[0], bp, branch.tts[1])?;
decode_b(c, t, f, bl, b[0], bp, node.v[1])?;
t.bx -= hsz;
t.by += hsz;
decode_b(c, t, f, bl, b[1], bp, branch.tts[2])?;
decode_b(c, t, f, bl, b[1], bp, node.h[1])?;
t.by -= hsz;
}
PARTITION_T_BOTTOM_SPLIT => {
let branch = intra_edge.branch(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, branch.tbs[0])?;
let node = intra_edge.node(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, node.h[0])?;
t.by += hsz;
decode_b(c, t, f, bl, b[1], bp, branch.tbs[1])?;
decode_b(c, t, f, bl, b[1], bp, node.v[0])?;
t.bx += hsz;
decode_b(c, t, f, bl, b[1], bp, branch.tbs[2])?;
decode_b(c, t, f, bl, b[1], bp, EdgeFlags::empty())?;
t.bx -= hsz;
t.by -= hsz;
}
PARTITION_T_LEFT_SPLIT => {
let branch = intra_edge.branch(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, branch.tls[0])?;
let node = intra_edge.node(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, EdgeFlags::ALL_TR_AND_BL)?;
t.by += hsz;
decode_b(c, t, f, bl, b[0], bp, branch.tls[1])?;
decode_b(c, t, f, bl, b[0], bp, node.h[1])?;
t.by -= hsz;
t.bx += hsz;
decode_b(c, t, f, bl, b[1], bp, branch.tls[2])?;
decode_b(c, t, f, bl, b[1], bp, node.v[1])?;
t.bx -= hsz;
}
PARTITION_T_RIGHT_SPLIT => {
let branch = intra_edge.branch(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, branch.trs[0])?;
let node = intra_edge.node(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, node.v[0])?;
t.bx += hsz;
decode_b(c, t, f, bl, b[1], bp, branch.trs[1])?;
decode_b(c, t, f, bl, b[1], bp, node.h[0])?;
t.by += hsz;
decode_b(c, t, f, bl, b[1], bp, (*branch).trs[2])?;
decode_b(c, t, f, bl, b[1], bp, EdgeFlags::empty())?;
t.by -= hsz;
t.bx -= hsz;
}
PARTITION_H4 => {
let branch = intra_edge.branch(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, branch.h4[0])?;
let node = &branch.node;
decode_b(c, t, f, bl, b[0], bp, node.h[0])?;
t.by += hsz >> 1;
decode_b(c, t, f, bl, b[0], bp, branch.h4[1])?;
decode_b(c, t, f, bl, b[0], bp, branch.h4)?;
t.by += hsz >> 1;
decode_b(c, t, f, bl, b[0], bp, branch.h4[2])?;
decode_b(c, t, f, bl, b[0], bp, EdgeFlags::ALL_LEFT_HAS_BOTTOM)?;
t.by += hsz >> 1;
if t.by < f.bh {
decode_b(c, t, f, bl, b[0], bp, branch.h4[3])?;
decode_b(c, t, f, bl, b[0], bp, node.h[1])?;
}
t.by -= hsz * 3 >> 1;
}
PARTITION_V4 => {
let branch = intra_edge.branch(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, branch.v4[0])?;
let node = &branch.node;
decode_b(c, t, f, bl, b[0], bp, node.v[0])?;
t.bx += hsz >> 1;
decode_b(c, t, f, bl, b[0], bp, branch.v4[1])?;
decode_b(c, t, f, bl, b[0], bp, branch.v4)?;
t.bx += hsz >> 1;
decode_b(c, t, f, bl, b[0], bp, branch.v4[2])?;
decode_b(c, t, f, bl, b[0], bp, EdgeFlags::ALL_TOP_HAS_RIGHT)?;
t.bx += hsz >> 1;
if t.bx < f.bw {
decode_b(c, t, f, bl, b[0], bp, branch.v4[3])?;
decode_b(c, t, f, bl, b[0], bp, node.v[1])?;
}
t.bx -= hsz * 3 >> 1;
}
Expand Down
69 changes: 20 additions & 49 deletions src/intra_edge.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,66 +44,37 @@ static void init_edges(EdgeNode *const node,
const enum EdgeFlags edge_flags)
{
node->o = edge_flags;
node->h[0] = edge_flags | EDGE_ALL_LEFT_HAS_BOTTOM;
node->v[0] = edge_flags | EDGE_ALL_TOP_HAS_RIGHT;

#define ALL_FL(t) (EDGE_I444_##t | EDGE_I422_##t | EDGE_I420_##t)
if (bl == BL_8X8) {
EdgeTip *const nt = (EdgeTip *) node;

node->h[0] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
node->h[1] = edge_flags & (ALL_FL(LEFT_HAS_BOTTOM) |
node->h[1] = edge_flags & (EDGE_ALL_LEFT_HAS_BOTTOM |
EDGE_I420_TOP_HAS_RIGHT);

node->v[0] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
node->v[1] = edge_flags & (ALL_FL(TOP_HAS_RIGHT) |
node->v[1] = edge_flags & (EDGE_ALL_TOP_HAS_RIGHT |
EDGE_I420_LEFT_HAS_BOTTOM |
EDGE_I422_LEFT_HAS_BOTTOM);

nt->split[0] = ALL_FL(TOP_HAS_RIGHT) | ALL_FL(LEFT_HAS_BOTTOM);
nt->split[1] = (edge_flags & ALL_FL(TOP_HAS_RIGHT)) |
nt->split[0] = (edge_flags & EDGE_ALL_TOP_HAS_RIGHT) |
EDGE_I422_LEFT_HAS_BOTTOM;
nt->split[2] = edge_flags | EDGE_I444_TOP_HAS_RIGHT;
nt->split[3] = edge_flags & (EDGE_I420_TOP_HAS_RIGHT |
nt->split[1] = edge_flags | EDGE_I444_TOP_HAS_RIGHT;
nt->split[2] = edge_flags & (EDGE_I420_TOP_HAS_RIGHT |
EDGE_I420_LEFT_HAS_BOTTOM |
EDGE_I422_LEFT_HAS_BOTTOM);
} else {
EdgeBranch *const nwc = (EdgeBranch *) node;

node->h[0] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
node->h[1] = edge_flags & ALL_FL(LEFT_HAS_BOTTOM);

node->v[0] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
node->v[1] = edge_flags & ALL_FL(TOP_HAS_RIGHT);

nwc->h4[0] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
nwc->h4[1] =
nwc->h4[2] = ALL_FL(LEFT_HAS_BOTTOM);
nwc->h4[3] = edge_flags & ALL_FL(LEFT_HAS_BOTTOM);
if (bl == BL_16X16)
nwc->h4[1] |= edge_flags & EDGE_I420_TOP_HAS_RIGHT;

nwc->v4[0] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
nwc->v4[1] =
nwc->v4[2] = ALL_FL(TOP_HAS_RIGHT);
nwc->v4[3] = edge_flags & ALL_FL(TOP_HAS_RIGHT);
if (bl == BL_16X16)
nwc->v4[1] |= edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM |
EDGE_I422_LEFT_HAS_BOTTOM);

nwc->tls[0] = ALL_FL(TOP_HAS_RIGHT) | ALL_FL(LEFT_HAS_BOTTOM);
nwc->tls[1] = edge_flags & ALL_FL(LEFT_HAS_BOTTOM);
nwc->tls[2] = edge_flags & ALL_FL(TOP_HAS_RIGHT);
node->h[1] = edge_flags & EDGE_ALL_LEFT_HAS_BOTTOM;
node->v[1] = edge_flags & EDGE_ALL_TOP_HAS_RIGHT;

nwc->trs[0] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
nwc->trs[1] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
nwc->trs[2] = 0;

nwc->tts[0] = ALL_FL(TOP_HAS_RIGHT) | ALL_FL(LEFT_HAS_BOTTOM);
nwc->tts[1] = edge_flags & ALL_FL(TOP_HAS_RIGHT);
nwc->tts[2] = edge_flags & ALL_FL(LEFT_HAS_BOTTOM);

nwc->tbs[0] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
nwc->tbs[1] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
nwc->tbs[2] = 0;
nwc->h4 = EDGE_ALL_LEFT_HAS_BOTTOM;
nwc->v4 = EDGE_ALL_TOP_HAS_RIGHT;
if (bl == BL_16X16) {
nwc->h4 |= edge_flags & EDGE_I420_TOP_HAS_RIGHT;
nwc->v4 |= edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM |
EDGE_I422_LEFT_HAS_BOTTOM);
}
}
}

Expand All @@ -114,17 +85,17 @@ static void init_mode_node(EdgeBranch *const nwc,
const int left_has_bottom)
{
init_edges(&nwc->node, bl,
(top_has_right ? ALL_FL(TOP_HAS_RIGHT) : 0) |
(left_has_bottom ? ALL_FL(LEFT_HAS_BOTTOM) : 0));
(top_has_right ? EDGE_ALL_TOP_HAS_RIGHT : 0) |
(left_has_bottom ? EDGE_ALL_LEFT_HAS_BOTTOM : 0));
if (bl == BL_16X16) {
for (int n = 0; n < 4; n++) {
EdgeTip *const nt = mem->nt++;
nwc->split[n] = &nt->node;
init_edges(&nt->node, bl + 1,
((n == 3 || (n == 1 && !top_has_right)) ? 0 :
ALL_FL(TOP_HAS_RIGHT)) |
EDGE_ALL_TOP_HAS_RIGHT) |
(!(n == 0 || (n == 2 && left_has_bottom)) ? 0 :
ALL_FL(LEFT_HAS_BOTTOM)));
EDGE_ALL_LEFT_HAS_BOTTOM));
}
} else {
for (int n = 0; n < 4; n++) {
Expand Down
20 changes: 14 additions & 6 deletions src/intra_edge.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,20 @@
#include <stdint.h>

enum EdgeFlags {
EDGE_I444_TOP_HAS_RIGHT = 1 << 0,
EDGE_I422_TOP_HAS_RIGHT = 1 << 1,
EDGE_I420_TOP_HAS_RIGHT = 1 << 2,
EDGE_I444_TOP_HAS_RIGHT = 1 << 0,
EDGE_I422_TOP_HAS_RIGHT = 1 << 1,
EDGE_I420_TOP_HAS_RIGHT = 1 << 2,
EDGE_I444_LEFT_HAS_BOTTOM = 1 << 3,
EDGE_I422_LEFT_HAS_BOTTOM = 1 << 4,
EDGE_I420_LEFT_HAS_BOTTOM = 1 << 5,
EDGE_ALL_TOP_HAS_RIGHT = EDGE_I444_TOP_HAS_RIGHT |
EDGE_I422_TOP_HAS_RIGHT |
EDGE_I420_TOP_HAS_RIGHT,
EDGE_ALL_LEFT_HAS_BOTTOM = EDGE_I444_LEFT_HAS_BOTTOM |
EDGE_I422_LEFT_HAS_BOTTOM |
EDGE_I420_LEFT_HAS_BOTTOM,
EDGE_ALL_TR_AND_BL = EDGE_ALL_TOP_HAS_RIGHT |
EDGE_ALL_LEFT_HAS_BOTTOM,
};

typedef struct EdgeNode {
Expand All @@ -45,13 +53,13 @@ typedef struct EdgeNode {

typedef struct EdgeTip {
EdgeNode node;
uint8_t /* enum EdgeFlags */ split[4];
uint8_t /* enum EdgeFlags */ split[3];
} EdgeTip;

typedef struct EdgeBranch {
EdgeNode node;
uint8_t /* enum EdgeFlags */ tts[3], tbs[3], tls[3], trs[3], h4[4], v4[4];
EdgeNode *split[4];
uint8_t /* enum EdgeFlags */ h4, v4;
const EdgeNode *split[4];
} EdgeBranch;

void dav1d_init_mode_tree(EdgeNode *const root, EdgeTip *const nt,
Expand Down
Loading

0 comments on commit 57d50de

Please sign in to comment.