diff --git a/src/encoder.c b/src/encoder.c index 987fe398..886e747e 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -1685,7 +1685,6 @@ static void encoder_state_write_bitstream_main(encoder_state * const main_state) int i; - if (main_state->global->is_radl_frame) { // Access Unit Delimiter (AUD) if (encoder->aud_enable) @@ -1726,17 +1725,24 @@ static void encoder_state_write_bitstream_main(encoder_state * const main_state) nal_write(stream, main_state->global->is_radl_frame ? NAL_IDR_W_RADL : NAL_TRAIL_R, 0, long_start_code); } - + { + PERFORMANCE_MEASURE_START(); for (i = 0; main_state->children[i].encoder_control; ++i) { //Append bitstream to main stream bitstream_append(&main_state->stream, &main_state->children[i].stream); //FIXME: Move this... bitstream_clear(&main_state->children[i].stream); } + PERFORMANCE_MEASURE_END(main_state->encoder_control->threadqueue, "type=write_bitstream_append,frame=%d,type=%c", main_state->global->frame, main_state->type); + } + + { + PERFORMANCE_MEASURE_START(); + // Calculate checksum + add_checksum(main_state); + PERFORMANCE_MEASURE_END(main_state->encoder_control->threadqueue, "type=write_bitstream_checksum,frame=%d,type=%c", main_state->global->frame, main_state->type); + } - // Calculate checksum - add_checksum(main_state); - //FIXME: Why is this needed? main_state->tile->cur_pic->poc = main_state->global->poc; } @@ -1817,9 +1823,19 @@ static void encoder_state_write_bitstream(encoder_state * const main_state) { void encode_one_frame(encoder_state * const main_state) { - encoder_state_new_frame(main_state); - encoder_state_encode(main_state); - encoder_state_write_bitstream(main_state); + { + PERFORMANCE_MEASURE_START(); + encoder_state_new_frame(main_state); + PERFORMANCE_MEASURE_END(main_state->encoder_control->threadqueue, "type=new_frame,frame=%d", main_state->global->frame); + } + { + PERFORMANCE_MEASURE_START(); + encoder_state_encode(main_state); + PERFORMANCE_MEASURE_END(main_state->encoder_control->threadqueue, "type=encode,frame=%d", main_state->global->frame); + } + { + encoder_state_write_bitstream(main_state); + } } static void fill_after_frame(unsigned height, unsigned array_width, diff --git a/tools/plot-threadqueue-log.py b/tools/plot-threadqueue-log.py index 4a11e358..f1c181bd 100644 --- a/tools/plot-threadqueue-log.py +++ b/tools/plot-threadqueue-log.py @@ -98,6 +98,94 @@ class LogThread: def plot(self, ax, i): ax.barh(i, self._stop - self._start, left=self._start, height=0.9, align='center',label="test", color='yellow') + +class IntervalThreadCounter: + def __init__(self): + self.interval_starts = [] + self.interval_stops = [] + + def add_interval(self, start, stop): + self.interval_starts.append(start) + self.interval_stops.append(stop) + self.interval_starts.sort() + self.interval_stops.sort() + + def get_values_xd(self): + #Double the first and the last items + xds = sorted([(x,'+') for x in self.interval_starts] + [(x,'-') for x in self.interval_stops]) + return xds + + def get_values_x(self): + xs = [] + for x in self.get_values_xd(): + xs.append(x[0]) + xs.append(x[0]) + return xs + + def get_values_y(self): + xds = self.get_values_xd() + ys = [] + counter = 0 + for xd in xds: + ys.append(counter) + + if xd[1] == '+': + counter += 1 + elif xd[1] == '-': + counter -= 1 + else: + assert False + + ys.append(counter) + + return ys + + def clamp(self, v, minval, maxval): + if v < minval: + return minval + if v > maxval: + return maxval + return v + + def get_values_uniform_xy(self, kernel_size, steps): + kernel_size=float(kernel_size) + xchs = self.get_values_x() + ychs = self.get_values_y() + + minval = xchs[0] - kernel_size + maxval = xchs[-1] + kernel_size + + pos = minval + + xvalues = [] + yvalues = [] + + while pos < maxval: + value = 0 + for i in range(1,len(xchs)-1): + if xchs[i] < pos - kernel_size: + continue + + v1 = self.clamp(xchs[i-1], pos - kernel_size/2., pos+kernel_size/2.) + v2 = self.clamp(xchs[i], pos - kernel_size/2., pos+kernel_size/2.) + + diff=v2-v1 + + value += diff*ychs[i]/kernel_size + + if xchs[i] > pos + kernel_size: + break + + xvalues.append(pos) + yvalues.append(value) + + pos += kernel_size/steps + + return xvalues, yvalues + + + + class LogParser: def _parse_time(self, base, sign, value): @@ -231,6 +319,19 @@ class LogParser: ax=fig.gca() yticks = {} + + #first draw usage + itc = IntervalThreadCounter() + for o in self._objects: + if isinstance(o, LogJob) and o._is_thread_job: + itc.add_interval(o._start, o._stop) + + #exact plot + ax.plot(itc.get_values_x(), [y+1.5 for y in itc.get_values_y()]) + vx,vy = itc.get_values_uniform_xy(0.01,10) + ax.plot(vx, [y+1.5 for y in vy], 'r') + + #first draw threads for o in self._objects: