Copy pixels more efficiently in lcu recon.

2024-11-27 19:24:06 +00:00 · 2016-03-18 20:10:03 +02:00 · 2016-03-18 20:10:03 +02:00 · f4538ab474
parent 50923238cc
commit f4538ab474
1 changed files with 14 additions and 2 deletions
--- a/src/inter.c
+++ b/src/inter.c
@ -373,14 +373,24 @@ void kvz_inter_recon_lcu(const encoder_state_t * const state,
        }
      }
    } else { //If no overflow, we can copy without checking boundaries
+      
+      #if LCU_WIDTH == 64
+        #define CHUNK int64_t
+      #else
+        #define CHUNK kvz_pixel
+      #endif
+
      // Copy Luma
      for (y = ypos; y < ypos + height; y++) {
        int y_in_lcu = (y & ((LCU_WIDTH)-1));
        coord_y = ((y + state->tile->lcu_offset_y * LCU_WIDTH) + mv[1]) * ref->width; // pre-calculate
-        for (x = xpos; x < xpos + width; x++) {
+        for (x = xpos; x < xpos + width; x+=sizeof(CHUNK)/sizeof(kvz_pixel)) {
          int x_in_lcu = (x & ((LCU_WIDTH)-1));
+          kvz_pixel *dst = &(lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu]);
+          kvz_pixel *src = &(ref->y[coord_y + (x + state->tile->lcu_offset_x * LCU_WIDTH) + mv[0]]);

-          lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y[coord_y + (x + state->tile->lcu_offset_x * LCU_WIDTH) + mv[0]];
+          //Copy one or many pixels simultaneously
+          *(CHUNK*)dst = *(CHUNK*)src;
        }
      }

@ -397,6 +407,8 @@ void kvz_inter_recon_lcu(const encoder_state_t * const state,
          }
        }
      }
+
+      #undef CHUNK
    }
  }
 }