Skip to content

Commit

Permalink
Merge branch 'inter-satd'
Browse files Browse the repository at this point in the history
  • Loading branch information
aryla committed Jul 25, 2017
2 parents 924cf85 + 19e051e commit afc13f1
Show file tree
Hide file tree
Showing 11 changed files with 266 additions and 148 deletions.
4 changes: 2 additions & 2 deletions src/encode_coding_tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1021,8 +1021,8 @@ void kvz_encode_coding_tree(encoder_state_t * const state,
uint8_t split_model = 0;

// Absolute coordinates
uint16_t abs_x = x + state->tile->lcu_offset_x * LCU_WIDTH;
uint16_t abs_y = y + state->tile->lcu_offset_y * LCU_WIDTH;
uint16_t abs_x = x + state->tile->offset_x;
uint16_t abs_y = y + state->tile->offset_y;

// Check for slice border FIXME
bool border_x = ctrl->in.width < abs_x + (LCU_WIDTH >> depth);
Expand Down
6 changes: 4 additions & 2 deletions src/encoder_state-ctors_dtors.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,12 @@ static int encoder_state_config_tile_init(encoder_state_t * const state,
printf("Error allocating videoframe!\r\n");
return 0;
}

state->tile->lcu_offset_x = lcu_offset_x;
state->tile->lcu_offset_y = lcu_offset_y;

state->tile->offset_x = lcu_offset_x * LCU_WIDTH;
state->tile->offset_y = lcu_offset_y * LCU_WIDTH;

state->tile->lcu_offset_in_ts = encoder->tiles_ctb_addr_rs_to_ts[lcu_offset_x + lcu_offset_y * encoder->in.width_in_lcu];

// hor_buf_search and ver_buf_search store single row/col from each LCU row/col.
Expand Down
9 changes: 4 additions & 5 deletions src/encoderstate.c
Original file line number Diff line number Diff line change
Expand Up @@ -636,12 +636,11 @@ static void encoder_state_worker_encode_lcu(void * opaque)
while (main_state->parent) main_state = main_state->parent;
assert(main_state != state);

const unsigned tile_x_px = state->tile->lcu_offset_x << LOG2_LCU_WIDTH;
const unsigned tile_y_px = state->tile->lcu_offset_y << LOG2_LCU_WIDTH;
const unsigned x_px = lcu->position_px.x;
const unsigned y_px = lcu->position_px.y;
kvz_cu_array_copy(main_state->tile->frame->cu_array,
x_px + tile_x_px, y_px + tile_y_px,
x_px + state->tile->offset_x,
y_px + state->tile->offset_y,
state->tile->frame->cu_array,
x_px, y_px,
LCU_WIDTH, LCU_WIDTH);
Expand Down Expand Up @@ -889,8 +888,8 @@ static void encoder_state_encode(encoder_state_t * const main_state) {
encoder_state_t *sub_state = &(main_state->children[i]);

if (sub_state->tile != main_state->tile) {
const int offset_x = sub_state->tile->lcu_offset_x * LCU_WIDTH;
const int offset_y = sub_state->tile->lcu_offset_y * LCU_WIDTH;
const int offset_x = sub_state->tile->offset_x;
const int offset_y = sub_state->tile->offset_y;
const int width = MIN(sub_state->tile->frame->width_in_lcu * LCU_WIDTH, main_state->tile->frame->width - offset_x);
const int height = MIN(sub_state->tile->frame->height_in_lcu * LCU_WIDTH, main_state->tile->frame->height - offset_y);

Expand Down
8 changes: 6 additions & 2 deletions src/encoderstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,15 @@ typedef struct encoder_state_config_tile_t {
videoframe_t *frame;

int32_t id;

//Tile: offset in LCU for current encoder_state in global coordinates
int32_t lcu_offset_x;
int32_t lcu_offset_y;


//Tile: offset in pixels
int32_t offset_x;
int32_t offset_y;

//Position of the first element in tile scan in global coordinates
int32_t lcu_offset_in_ts;

Expand Down
91 changes: 79 additions & 12 deletions src/image.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <limits.h>
#include <stdlib.h>

#include "strategies/strategies-ipol.h"
#include "strategies/strategies-picture.h"
#include "threads.h"

Expand Down Expand Up @@ -449,21 +450,19 @@ static unsigned image_interpolated_sad(const kvz_picture *pic, const kvz_picture
* \param pic Image for the block we are trying to find.
* \param ref Image where we are trying to find the block.
*
* \returns
* \returns Sum of absolute differences
*/
unsigned kvz_image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_x, int pic_y, int ref_x, int ref_y,
int block_width, int block_height, int max_px_below_lcu) {
unsigned kvz_image_calc_sad(const kvz_picture *pic,
const kvz_picture *ref,
int pic_x,
int pic_y,
int ref_x,
int ref_y,
int block_width,
int block_height)
{
assert(pic_x >= 0 && pic_x <= pic->width - block_width);
assert(pic_y >= 0 && pic_y <= pic->height - block_height);

// Check that we are not referencing pixels that are not final.
if (max_px_below_lcu >= 0) {
int next_lcu_row_px = ((pic_y >> LOG2_LCU_WIDTH) + 1) << LOG2_LCU_WIDTH;
int px_below_lcu = ref_y + block_height - next_lcu_row_px;
if (px_below_lcu > max_px_below_lcu) {
return INT_MAX;
}
}

if (ref_x >= 0 && ref_x <= ref->width - block_width &&
ref_y >= 0 && ref_y <= ref->height - block_height)
Expand All @@ -480,6 +479,74 @@ unsigned kvz_image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int
}


/**
* \brief Calculate interpolated SATD between two blocks.
*
* \param pic Image for the block we are trying to find.
* \param ref Image where we are trying to find the block.
*/
unsigned kvz_image_calc_satd(const kvz_picture *pic,
const kvz_picture *ref,
int pic_x,
int pic_y,
int ref_x,
int ref_y,
int block_width,
int block_height)
{
assert(pic_x >= 0 && pic_x <= pic->width - block_width);
assert(pic_y >= 0 && pic_y <= pic->height - block_height);

if (ref_x >= 0 && ref_x <= ref->width - block_width &&
ref_y >= 0 && ref_y <= ref->height - block_height)
{
// Reference block is completely inside the frame, so just calculate the
// SAD directly. This is the most common case, which is why it's first.
const kvz_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x];
const kvz_pixel *ref_data = &ref->y[ref_y * ref->stride + ref_x];
return kvz_satd_any_size(block_width,
block_height,
pic_data,
pic->stride,
ref_data,
ref->stride) >> (KVZ_BIT_DEPTH - 8);
} else {
// Extrapolate pixels from outside the frame.
kvz_extended_block block;
kvz_get_extended_block(pic_x,
pic_y,
ref_x - pic_x,
ref_y - pic_y,
0,
0,
ref->y,
ref->width,
ref->height,
0,
block_width,
block_height,
&block);

const kvz_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x];

unsigned satd = kvz_satd_any_size(block_width,
block_height,
pic_data,
pic->stride,
block.buffer,
block.stride) >> (KVZ_BIT_DEPTH - 8);

if (block.malloc_used) {
FREE_POINTER(block.buffer);
}

return satd;
}
}




/**
* \brief BLock Image Transfer from one buffer to another.
*
Expand Down
20 changes: 18 additions & 2 deletions src/image.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,24 @@ void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv);


//Algorithms
unsigned kvz_image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_x, int pic_y, int ref_x, int ref_y,
int block_width, int block_height, int max_lcu_below);
unsigned kvz_image_calc_sad(const kvz_picture *pic,
const kvz_picture *ref,
int pic_x,
int pic_y,
int ref_x,
int ref_y,
int block_width,
int block_height);


unsigned kvz_image_calc_satd(const kvz_picture *pic,
const kvz_picture *ref,
int pic_x,
int pic_y,
int ref_x,
int ref_y,
int block_width,
int block_height);


void kvz_pixels_blit(const kvz_pixel* orig, kvz_pixel *dst,
Expand Down
52 changes: 34 additions & 18 deletions src/inter.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ static void inter_recon_frac_luma(const encoder_state_t * const state,
ypos,
mv_param[0] >> 2,
mv_param[1] >> 2,
state->tile->lcu_offset_x * LCU_WIDTH,
state->tile->lcu_offset_y * LCU_WIDTH,
state->tile->offset_x,
state->tile->offset_y,
ref->y,
ref->width,
ref->height,
Expand Down Expand Up @@ -106,8 +106,8 @@ static void inter_recon_14bit_frac_luma(const encoder_state_t * const state,
ypos,
mv_param[0] >> 2,
mv_param[1] >> 2,
state->tile->lcu_offset_x * LCU_WIDTH,
state->tile->lcu_offset_y * LCU_WIDTH,
state->tile->offset_x,
state->tile->offset_y,
ref->y,
ref->width,
ref->height,
Expand Down Expand Up @@ -154,14 +154,34 @@ static void inter_recon_frac_chroma(const encoder_state_t * const state,
kvz_extended_block src_v = { 0, 0, 0, 0 };

//Fractional chroma U
kvz_get_extended_block(xpos, ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, state->tile->lcu_offset_x * LCU_WIDTH_C, state->tile->lcu_offset_y * LCU_WIDTH_C,
ref->u, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_height, &src_u);
kvz_get_extended_block(xpos, ypos,
(mv_param[0] >> 2) >> 1,
(mv_param[1] >> 2) >> 1,
state->tile->offset_x >> 1,
state->tile->offset_y >> 1,
ref->u,
ref->width >> 1,
ref->height >> 1,
FILTER_SIZE_C,
block_width,
block_height,
&src_u);
kvz_sample_octpel_chroma(state->encoder_control, src_u.orig_topleft, src_u.stride, block_width,
block_height, lcu->rec.u + (ypos % LCU_WIDTH_C)*LCU_WIDTH_C + (xpos % LCU_WIDTH_C), LCU_WIDTH_C, mv_frac_x, mv_frac_y, mv_param);

//Fractional chroma V
kvz_get_extended_block(xpos, ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, state->tile->lcu_offset_x * LCU_WIDTH_C, state->tile->lcu_offset_y * LCU_WIDTH_C,
ref->v, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_height, &src_v);
kvz_get_extended_block(xpos, ypos,
(mv_param[0] >> 2) >> 1,
(mv_param[1] >> 2) >> 1,
state->tile->offset_x >> 1,
state->tile->offset_y >> 1,
ref->v,
ref->width >> 1,
ref->height >> 1,
FILTER_SIZE_C,
block_width,
block_height,
&src_v);
kvz_sample_octpel_chroma(state->encoder_control, src_v.orig_topleft, src_v.stride, block_width,
block_height, lcu->rec.v + (ypos % LCU_WIDTH_C)*LCU_WIDTH_C + (xpos % LCU_WIDTH_C), LCU_WIDTH_C, mv_frac_x, mv_frac_y, mv_param);

Expand Down Expand Up @@ -198,8 +218,8 @@ static void inter_recon_14bit_frac_chroma(const encoder_state_t * const state,
ypos,
(mv_param[0] >> 2) >> 1,
(mv_param[1] >> 2) >> 1,
state->tile->lcu_offset_x * LCU_WIDTH_C,
state->tile->lcu_offset_y * LCU_WIDTH_C,
state->tile->offset_x >> 1,
state->tile->offset_y >> 1,
ref->u,
ref->width >> 1,
ref->height >> 1,
Expand All @@ -223,8 +243,8 @@ static void inter_recon_14bit_frac_chroma(const encoder_state_t * const state,
ypos,
(mv_param[0] >> 2) >> 1,
(mv_param[1] >> 2) >> 1,
state->tile->lcu_offset_x * LCU_WIDTH_C,
state->tile->lcu_offset_y * LCU_WIDTH_C,
state->tile->offset_x >> 1,
state->tile->offset_y >> 1,
ref->v,
ref->width >> 1,
ref->height >> 1,
Expand Down Expand Up @@ -308,17 +328,13 @@ void kvz_inter_recon_lcu(const encoder_state_t * const state,
lcu_t *lcu,
hi_prec_buf_t *hi_prec_out)
{
const vector2d_t tile_in_frame = {
state->tile->lcu_offset_x * LCU_WIDTH,
state->tile->lcu_offset_y * LCU_WIDTH
};
const vector2d_t pu_in_tile = { xpos, ypos };
const vector2d_t pu_in_lcu = { xpos % LCU_WIDTH, ypos % LCU_WIDTH };

const vector2d_t mv_in_pu = { mv_param[0] >> 2, mv_param[1] >> 2 };
const vector2d_t mv_in_frame = {
mv_in_pu.x + pu_in_tile.x + tile_in_frame.x,
mv_in_pu.y + pu_in_tile.y + tile_in_frame.y
mv_in_pu.x + pu_in_tile.x + state->tile->offset_x,
mv_in_pu.y + pu_in_tile.y + state->tile->offset_y
};

const bool mv_is_outside_frame = mv_in_frame.x < 0 ||
Expand Down
19 changes: 8 additions & 11 deletions src/search.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,8 @@
&& (x) + (block_width) <= (width) \
&& (y) + (block_height) <= (height))

// Cost treshold for doing intra search in inter frames with --rd=0.
#ifndef INTRA_TRESHOLD
# define INTRA_TRESHOLD 20
#endif

// Cost threshold for doing intra search in inter frames with --rd=0.
static const int INTRA_THRESHOLD = 8;

// Modify weight of luma SSD.
#ifndef LUMA_MULT
Expand Down Expand Up @@ -484,7 +481,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// decision after reconstructing the inter frame.
bool skip_intra = state->encoder_control->cfg.rdo == 0
&& cur_cu->type != CU_NOTSET
&& cost / (cu_width * cu_width) < INTRA_TRESHOLD;
&& cost / (cu_width * cu_width) < INTRA_THRESHOLD;

int32_t cu_width_intra_min = LCU_WIDTH >> ctrl->cfg.pu_depth_intra.max;
bool can_use_intra =
Expand Down Expand Up @@ -719,11 +716,11 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
}

PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHCU, state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d,depth=%d,split=%d,cur_cu_is_intra=%d", state->frame->num, state->tile->id, state->slice->id,
(state->tile->lcu_offset_x * LCU_WIDTH) + x,
(state->tile->lcu_offset_x * LCU_WIDTH) + x + (LCU_WIDTH >> depth),
(state->tile->lcu_offset_y * LCU_WIDTH) + y,
(state->tile->lcu_offset_y * LCU_WIDTH) + y + (LCU_WIDTH >> depth),
depth, debug_split, (cur_cu->type==CU_INTRA)?1:0);
state->tile->offset_x + x,
state->tile->offset_x + x + cu_width,
state->tile->offset_y + y,
state->tile->offset_y + y + cu_width,
depth, debug_split, (cur_cu->type == CU_INTRA) ? 1 : 0);

assert(cur_cu->type != CU_NOTSET);

Expand Down
Loading

0 comments on commit afc13f1

Please sign in to comment.