diff --git a/src/cu.c b/src/cu.c index 5ed7a140..7f9f8e94 100644 --- a/src/cu.c +++ b/src/cu.c @@ -125,6 +125,8 @@ cu_array_t * kvz_cu_array_alloc(const int width_in_scu, const int height_in_scu) cu_array_t *cua; cua = MALLOC(cu_array_t, 1); cua->data = (cu_info_t*)malloc(sizeof(cu_info_t) * cu_array_size); + cua->width = width_in_scu << 3; + cua->height = height_in_scu << 3; cua->refcount = 1; FILL_ARRAY(cua->data, 0, cu_array_size); return cua; @@ -145,3 +147,37 @@ int kvz_cu_array_free(cu_array_t * const cua) return 1; } + +/** + * \brief Copy part of a cu array to another cu array. + * + * All values are in luma pixels. + * + * \param dst destination array + * \param dst_x x-coordinate of the left edge of the copied area in dst + * \param dst_y y-coordinate of the top edge of the copied area in dst + * \param src source array + * \param src_x x-coordinate of the left edge of the copied area in src + * \param src_y y-coordinate of the top edge of the copied area in src + * \param width width of the area to copy + * \param height height of the area to copy + */ +void kvz_cu_array_copy(cu_array_t* dst, int dst_x, int dst_y, + const cu_array_t* src, int src_x, int src_y, + int width, int height) +{ + // Convert values from pixel coordinates to array indices. + int src_stride = src->width >> 3; + int dst_stride = dst->width >> 3; + const cu_info_t* src_ptr = &src->data[(src_x >> 3) + (src_y >> 3) * src_stride]; + cu_info_t* dst_ptr = &dst->data[(dst_x >> 3) + (dst_y >> 3) * dst_stride]; + + // Number of bytes to copy per row. + const size_t row_size = sizeof(cu_info_t) * (width >> 3); + + for (int i = 0; i < (height >> 3); ++i) { + memcpy(dst_ptr, src_ptr, row_size); + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} diff --git a/src/cu.h b/src/cu.h index 1517ef01..36f4a310 100644 --- a/src/cu.h +++ b/src/cu.h @@ -178,12 +178,18 @@ typedef struct (cu).inter.mv_cand, (cu).inter.mv_ref, (cu).inter.mv_dir, (cu).inter.mode) typedef struct { - cu_info_t *data; //!< \brief cu_info data - int32_t refcount; //!< \brief number of references in reflists to this cu_array + cu_info_t *data; //!< \brief cu array + int32_t width; //!< \brief width of the array in pixels + int32_t height; //!< \brief height of the array in pixels + int32_t refcount; //!< \brief number of references to this cu_array } cu_array_t; cu_array_t * kvz_cu_array_alloc(int width_in_scu, int height_in_scu); int kvz_cu_array_free(cu_array_t *cua); +void kvz_cu_array_copy(cu_array_t* dst, int dst_x, int dst_y, + const cu_array_t* src, int src_x, int src_y, + int width, int height); + /** * \brief Return the 7 lowest-order bits of the pixel coordinate. diff --git a/src/encoderstate.c b/src/encoderstate.c index 6ca153b1..417109fb 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -207,18 +207,15 @@ static void encoder_state_worker_encode_lcu(void * opaque) { while (main_state->parent) main_state = main_state->parent; assert(main_state != state); - unsigned child_width_in_scu = state->tile->frame->width_in_lcu << MAX_DEPTH; - unsigned main_width_in_scu = main_state->tile->frame->width_in_lcu << MAX_DEPTH; - unsigned tile_x = state->tile->lcu_offset_x << MAX_DEPTH; - unsigned tile_y = state->tile->lcu_offset_y << MAX_DEPTH; - unsigned x = lcu->position.x << MAX_DEPTH; - unsigned y = lcu->position.y << MAX_DEPTH; - - for (unsigned lcu_row = 0; lcu_row < 8; ++lcu_row) { - cu_info_t *main_row = &main_state->tile->frame->cu_array->data[x + tile_x + (y + tile_y + lcu_row) * main_width_in_scu]; - cu_info_t *child_row = &state->tile->frame->cu_array->data[x + (y + lcu_row) * child_width_in_scu]; - memcpy(main_row, child_row, sizeof(cu_info_t) * 8); - } + const unsigned tile_x_px = state->tile->lcu_offset_x << LOG2_LCU_WIDTH; + const unsigned tile_y_px = state->tile->lcu_offset_y << LOG2_LCU_WIDTH; + const unsigned x_px = lcu->position_px.x; + const unsigned y_px = lcu->position_px.y; + kvz_cu_array_copy(main_state->tile->frame->cu_array, + x_px + tile_x_px, y_px + tile_y_px, + state->tile->frame->cu_array, + x_px, y_px, + LCU_WIDTH, LCU_WIDTH); PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=copy_cuinfo,frame=%d,tile=%d", state->global->frame, state->tile->id); }