diff --git a/src/encoderstate.c b/src/encoderstate.c
index 89ba6a10..91ea23d3 100644
--- a/src/encoderstate.c
+++ b/src/encoderstate.c
@@ -259,11 +259,18 @@ static void encoder_state_worker_encode_lcu(void * opaque) {
   }
   
   if (encoder->sao_enable && lcu->above) {
-    //If we're not the first in the row
+    // Add the post-deblocking but pre-SAO pixels of the LCU row above this
+    // row to a buffer so this row can use them on it's own SAO
+    // reconstruction.
+
+    // The pixels need to be taken to from the LCU to the top-left, because
+    // not all of the pixels could be deblocked before prediction of this
+    // LCU was reconstructed.
     if (lcu->above->left) {
       encoder_state_recdata_to_bufs(state, lcu->above->left, state->tile->hor_buf_before_sao, NULL);
     }
-    //Latest LCU in the row, copy the data from the one above also
+    // If this is the last LCU in the row, we can save the pixels from the top
+    // also, as they have been fully deblocked.
     if (!lcu->right) {
       encoder_state_recdata_to_bufs(state, lcu->above, state->tile->hor_buf_before_sao, NULL);
     }
@@ -558,13 +565,17 @@ static void encoder_state_encode(encoder_state_t * const main_state) {
         }
       }
       
-      //If children are wavefront, we need to reconstruct SAO
-      if (main_state->encoder_control->sao_enable && main_state->children[0].type == ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
+      // Add SAO reconstruction jobs and their dependancies when using WPP coding.
+      if (main_state->encoder_control->sao_enable && 
+          main_state->children[0].type == ENCODER_STATE_TYPE_WAVEFRONT_ROW)
+      {
         int y;
         videoframe_t * const frame = main_state->tile->frame;
         threadqueue_job_t *previous_job = NULL;
         
         for (y = 0; y < frame->height_in_lcu; ++y) {
+          // Queue a single job performing SAO reconstruction for the whole wavefront row.
+
           worker_sao_reconstruct_lcu_data *data = MALLOC(worker_sao_reconstruct_lcu_data, 1);
           threadqueue_job_t *job;
 #ifdef KVZ_DEBUG
@@ -578,24 +589,31 @@ static void encoder_state_encode(encoder_state_t * const main_state) {
           
           job = kvz_threadqueue_submit(main_state->encoder_control->threadqueue, encoder_state_worker_sao_reconstruct_lcu, data, 1, job_description);
           
+          // This dependancy is needed, because the pre-SAO pixels from the LCU row
+          // below this one are read straigh from the frame.
           if (previous_job) {
             kvz_threadqueue_job_dep_add(job, previous_job);
           }
           previous_job = job;
           
+          // This depepndancy ensures that the bottom edge of this LCU row
+          // has been fully deblocked.
           if (y < frame->height_in_lcu - 1) {
-            //Not last row: depend on the last LCU of the row below
+            // Not last row: depend on the last LCU of the row below.
             kvz_threadqueue_job_dep_add(job, main_state->tile->wf_jobs[(y + 1) * frame->width_in_lcu + frame->width_in_lcu - 1]);
           } else {
-            //Last row: depend on the last LCU of the row
+            // Last row: depend on the last LCU of the row
             kvz_threadqueue_job_dep_add(job, main_state->tile->wf_jobs[(y + 0) * frame->width_in_lcu + frame->width_in_lcu - 1]);
           }
           kvz_threadqueue_job_unwait_job(main_state->encoder_control->threadqueue, job);
           
-          //Set wfrow recon job
+          // The wavefront row is finished, when the SAO-reconstruction is
+          // finished.
           main_state->children[y].tqj_recon_done = job;
           
           if (y == frame->height_in_lcu - 1) {
+            // This tile is finished, when the reconstruction of the last
+            // WPP-row is finished.
             assert(!main_state->tqj_recon_done);
             main_state->tqj_recon_done = job;
           }
diff --git a/src/encoderstate.h b/src/encoderstate.h
index 8a4c4796..c4de42ad 100644
--- a/src/encoderstate.h
+++ b/src/encoderstate.h
@@ -116,8 +116,8 @@ typedef struct {
   //order by column of (LCU_WIDTH * encoder_state->height_in_lcu) pixels (there is no more extra pixel, since we can use a negative index)
   yuv_t *ver_buf_search;
   
+  // The bottom post-deblocking, pre-SAO pixels of every WPP-row.
   yuv_t *hor_buf_before_sao;
-  yuv_t *ver_buf_before_sao;
   
   //Jobs for each individual LCU of a wavefront row.
   threadqueue_job_t **wf_jobs;