[ibc] Implement CRC for 8x8 block and generate a full hashmap at the frame load

This commit is contained in:
Marko Viitanen 2023-06-26 21:24:10 +03:00
parent 4b1f5ca7e2
commit 76d66591c5
7 changed files with 103 additions and 14 deletions

View file

@ -45,17 +45,20 @@
#include "encode_coding_tree.h"
#include "encoder_state-bitstream.h"
#include "filter.h"
#include "hashmap.h"
#include "image.h"
#include "rate_control.h"
#include "sao.h"
#include "search.h"
#include "tables.h"
#include "threads.h"
#include "threadqueue.h"
#include "alf.h"
#include "reshape.h"
#include "strategies/strategies-picture.h"
/**
* \brief Strength of QP adjustments when using adaptive QP for 360 video.
*
@ -1936,6 +1939,36 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict
assert(0);
}
if (state->encoder_control->cfg.ibc != 0) {
int items = 0;
UVG_CLOCK_T hashmap_start_real_time;
UVG_CLOCK_T hashmap_end_real_time;
UVG_GET_TIME(&hashmap_start_real_time);
// Create a new hashmap with UVG_HASHMAP_RATIO buckets per 4x4 block
state->tile->frame->ibc_hashmap = uvg_hashmap_create(
(int)(((float)(state->tile->frame->width * state->tile->frame->height) /
(float)(UVG_HASHMAP_BLOCKSIZE * UVG_HASHMAP_BLOCKSIZE)) * UVG_HASHMAP_RATIO));
// Fill the hashmap with the current frame's block information
for (int y = 0; y < state->tile->frame->height; y += 1) {
for (int x = 0; x < state->tile->frame->width; x += 1) {
uint32_t crc = uvg_crc32c_8x8(state->tile->frame->source->y + y * state->tile->frame->width + x, state->tile->frame->width);
//uint32_t found = uvg_hashmap_search_return_first(state->tile->frame->ibc_hashmap, crc);
//uvg_hashmap_node_t* found = uvg_hashmap_search(state->tile->frame->ibc_hashmap, crc);
//if (found != NULL) uvg_hashmap_node_free(found);
uvg_hashmap_insert(state->tile->frame->ibc_hashmap, crc, ((x&0xffff)<<16) | (y&0xffff));
items++;
}
}
UVG_GET_TIME(&hashmap_end_real_time);
double wall_time = UVG_CLOCK_T_AS_DOUBLE(hashmap_end_real_time) -
UVG_CLOCK_T_AS_DOUBLE(hashmap_start_real_time);
fprintf(stderr, "Hashmap creation time: %f, items: %d, size %d\n", wall_time, items, state->tile->frame->ibc_hashmap->bucket_size);
}
if (state->encoder_control->cfg.lmcs_enable) {
uvg_init_lmcs_aps(state->tile->frame->lmcs_aps, state->encoder_control->cfg.width, state->encoder_control->cfg.height, LCU_CU_WIDTH, LCU_CU_WIDTH, state->encoder_control->bitdepth);

View file

@ -72,9 +72,11 @@ uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size)
* \param bucket_size the size of the hashmap bucket
* \return the hashed index for the given key and bucket size.
*/
uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size) {
static uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size)
{
key ^= (key >> 20) ^ (key >> 12);
return (key ^ (key >> 7) ^ (key >> 4) ^ 2654435769U) % bucket_size;
//return key % bucket_size;
}
/**
@ -99,21 +101,22 @@ void uvg_hashmap_insert(uvg_hashmap_t* map, uint32_t key, uint32_t value) {
* \return uvg_hashmap_node the node with the given key, NULL if not found.
*/
uvg_hashmap_node_t* uvg_hashmap_search(uvg_hashmap_t* map, uint32_t key) {
uint32_t hashIndex = uvg_hashmap_hash(key, map->bucket_size);
return map->table[hashIndex];
}
uint32_t uvg_hashmap_search_return_first(uvg_hashmap_t* map, uint32_t key)
{
uint32_t hashIndex = uvg_hashmap_hash(key, map->bucket_size);
uvg_hashmap_node_t* temp = map->table[hashIndex];
uvg_hashmap_node_t* return_node = NULL;
// Search key in chain and return all of them
// Search key in chain and return the first match
while (temp) {
if (temp->key == key) {
uvg_hashmap_node_t* new_node = uvg_hashmap_create_node(key, temp->value);
if (return_node != NULL) {
new_node->next = (void*)return_node;
}
return_node = new_node;
return temp->value;
}
temp = (uvg_hashmap_node_t*)temp->next;
}
return return_node;
return -1;
}
/**

View file

@ -37,9 +37,9 @@
#include <stdint.h>
// The ratio of the hashmap bucket size to the maximum number of elements
#define UVG_HASHMAP_RATIO 0.35
#define UVG_HASHMAP_RATIO 6.0
// Use Hashmap for 4x4 blocks
#define UVG_HASHMAP_BLOCKSIZE 4
#define UVG_HASHMAP_BLOCKSIZE 8
typedef struct uvg_hashmap_node {
uint32_t key;
@ -56,12 +56,14 @@ uvg_hashmap_node_t* uvg_hashmap_create_node(uint32_t key, uint32_t value);
uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size);
uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size);
//uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size);
void uvg_hashmap_insert(uvg_hashmap_t* map, uint32_t key, uint32_t value);
uvg_hashmap_node_t* uvg_hashmap_search(uvg_hashmap_t* map, uint32_t key);
uint32_t uvg_hashmap_search_return_first(uvg_hashmap_t* map, uint32_t key);
void uvg_hashmap_node_free(uvg_hashmap_node_t* node);
void uvg_hashmap_free(uvg_hashmap_t* map);

View file

@ -802,6 +802,20 @@ INLINE static uint32_t uvg_crc32c_4_generic(uint32_t crc, const uvg_pixel *buf)
return crc;
}
INLINE static uint32_t uvg_crc32c_8_generic(uint32_t crc, const uvg_pixel *buf)
{
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[0]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[1]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[2]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[3]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[4]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[5]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[6]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[7]) & 0xFF];
return crc;
}
static uint32_t uvg_crc32c_4x4_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
{
uint32_t crc = 0xFFFFFFFF;
@ -829,11 +843,29 @@ static uint32_t uvg_crc32c_4x4_16bit_generic(const uvg_pixel *buf, uint32_t pic_
return crc ^ 0xFFFFFFFF;
}
static uint32_t uvg_crc32c_8x8_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
{
uint32_t crc = 0xFFFFFFFF;
crc = uvg_crc32c_8_generic(crc, &buf[0 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[1 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[2 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[3 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[4 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[5 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[6 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[7 * pic_stride]);
return crc ^ 0xFFFFFFFF;
}
int uvg_strategy_register_picture_generic(void* opaque, uint8_t bitdepth)
{
bool success = true;
if (bitdepth == 8) {
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_8bit_generic);
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_8bit_generic);
} else {
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_16bit_generic);
}

View file

@ -64,6 +64,20 @@ static uint32_t uvg_crc32c_4x4_16bit_sse42(const uvg_pixel *buf, uint32_t pic_st
return crc ^ 0xFFFFFFFF;
}
static uint32_t uvg_crc32c_8x8_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride)
{
uint32_t crc = 0xFFFFFFFF;
crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[0 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[1 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[2 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[3 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[4 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[5 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[6 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[7 * pic_stride]));
return crc ^ 0xFFFFFFFF;
}
#endif //COMPILE_INTEL_SSE42
@ -72,6 +86,7 @@ int uvg_strategy_register_picture_sse42(void* opaque, uint8_t bitdepth) {
#if COMPILE_INTEL_SSE42
if (bitdepth == 8){
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_8bit_sse42);
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_8bit_sse42);
} else {
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_16bit_sse42);
}

View file

@ -42,6 +42,7 @@
// Define function pointers.
crc32c_4x4_func * uvg_crc32c_4x4 = 0;
crc32c_8x8_func * uvg_crc32c_8x8 = 0;
reg_sad_func * uvg_reg_sad = 0;
cost_pixel_nxn_func * uvg_sad_4x4 = 0;

View file

@ -155,9 +155,11 @@ typedef void (generate_residual_func)(const uvg_pixel* ref_in, const uvg_pixel*
extern const uint32_t uvg_crc_table[256];
typedef uint32_t(crc32c_4x4_func)(const uvg_pixel *buf, uint32_t pic_stride);
typedef uint32_t(crc32c_8x8_func)(const uvg_pixel *buf, uint32_t pic_stride);
// Declare function pointers.
extern crc32c_4x4_func * uvg_crc32c_4x4;
extern crc32c_8x8_func * uvg_crc32c_8x8;
extern reg_sad_func * uvg_reg_sad;
@ -206,6 +208,7 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n);
#define STRATEGIES_PICTURE_EXPORTS \
{"crc32c_4x4", (void**) &uvg_crc32c_4x4}, \
{"crc32c_8x8", (void **)&uvg_crc32c_8x8}, \
{"reg_sad", (void**) &uvg_reg_sad}, \
{"sad_4x4", (void**) &uvg_sad_4x4}, \
{"sad_8x8", (void**) &uvg_sad_8x8}, \