commit 7b9d0d62bc351efe5313eb600d21d364be5e4cfb log] tgz] author kyslov <kyslov@google.com> Fri Dec 21 11:12:26 2018 -0800 committer Fyodor Kyslov <kyslov@google.com> Wed Feb 06 19:03:07 2019 +0000 tree 3ed2d536fd5143bb4302849631fef5885ab19c17 parent 3ff6ff36eb9b2535efdc236c848973c5cae5594d diff]

AV1 RealTime path. Initial implementation Change-Id: If264acb0f6df620a2af2fd19706e023eadee5382

@@ -861,6 +861,11 @@ */ aom_fixed_buf_t *aom_codec_get_global_headers(aom_codec_ctx_t *ctx); +/*!\brief usage parameter analogous to AV1 GOOD QUALITY mode. */ +#define AOM_USAGE_GOOD_QUALITY (0) +/*!\brief usage parameter analogous to AV1 REALTIME mode. */ +#define AOM_USAGE_REALTIME (1) + /*!\brief Encode a frame * * Encodes a video frame at the given "presentation time." The presentation

@@ -836,6 +836,18 @@ specialize qw/aom_highbd_sad64x16x4d sse2/; # + # Avg + # + add_proto qw/unsigned int aom_avg_8x8/, "const uint8_t *, int p"; + specialize qw/aom_avg_8x8 sse2/; + + add_proto qw/unsigned int aom_avg_4x4/, "const uint8_t *, int p"; + specialize qw/aom_avg_4x4 sse2/; + + add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; + specialize qw/aom_minmax_8x8 sse2/; + + # # hamadard transform and satd for implmenting temporal dependency model # add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";

@@ -14,6 +14,40 @@ #include "config/aom_dsp_rtcd.h" #include "aom_ports/mem.h" +void aom_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, + int *min, int *max) { + int i, j; + *min = 255; + *max = 0; + for (i = 0; i < 8; ++i, s += p, d += dp) { + for (j = 0; j < 8; ++j) { + int diff = abs(s[j] - d[j]); + *min = diff < *min ? diff : *min; + *max = diff > *max ? diff : *max; + } + } +} + +unsigned int aom_avg_4x4_c(const uint8_t *s, int p) { + int i, j; + int sum = 0; + for (i = 0; i < 4; ++i, s += p) + for (j = 0; j < 4; sum += s[j], ++j) { + } + + return (sum + 8) >> 4; +} + +unsigned int aom_avg_8x8_c(const uint8_t *s, int p) { + int i, j; + int sum = 0; + for (i = 0; i < 8; ++i, s += p) + for (j = 0; j < 8; sum += s[j], ++j) { + } + + return (sum + 32) >> 6; +} + // src_diff: first pass, 9 bit, dynamic range [-255, 255] // second pass, 12 bit, dynamic range [-2040, 2040] static void hadamard_col8(const int16_t *src_diff, ptrdiff_t src_stride,

@@ -16,6 +16,129 @@ #include "aom_dsp/x86/bitdepth_conversion_sse2.h" #include "aom_ports/mem.h" +void aom_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, + int *min, int *max) { + __m128i u0, s0, d0, diff, maxabsdiff, minabsdiff, negdiff, absdiff0, absdiff; + u0 = _mm_setzero_si128(); + // Row 0 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff0 = _mm_max_epi16(diff, negdiff); + // Row 1 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(absdiff0, absdiff); + minabsdiff = _mm_min_epi16(absdiff0, absdiff); + // Row 2 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 2 * dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); + minabsdiff = _mm_min_epi16(minabsdiff, absdiff); + // Row 3 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 3 * dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); + minabsdiff = _mm_min_epi16(minabsdiff, absdiff); + // Row 4 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 4 * dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); + minabsdiff = _mm_min_epi16(minabsdiff, absdiff); + // Row 5 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 5 * dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); + minabsdiff = _mm_min_epi16(minabsdiff, absdiff); + // Row 6 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 6 * dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); + minabsdiff = _mm_min_epi16(minabsdiff, absdiff); + // Row 7 + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0); + d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 7 * dp)), u0); + diff = _mm_subs_epi16(s0, d0); + negdiff = _mm_subs_epi16(u0, diff); + absdiff = _mm_max_epi16(diff, negdiff); + maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); + minabsdiff = _mm_min_epi16(minabsdiff, absdiff); + + maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_si128(maxabsdiff, 8)); + maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_epi64(maxabsdiff, 32)); + maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_epi64(maxabsdiff, 16)); + *max = _mm_extract_epi16(maxabsdiff, 0); + + minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_si128(minabsdiff, 8)); + minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_epi64(minabsdiff, 32)); + minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_epi64(minabsdiff, 16)); + *min = _mm_extract_epi16(minabsdiff, 0); +} + +unsigned int aom_avg_8x8_sse2(const uint8_t *s, int p) { + __m128i s0, s1, u0; + unsigned int avg = 0; + u0 = _mm_setzero_si128(); + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + + s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8)); + s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 32)); + s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16)); + avg = _mm_extract_epi16(s0, 0); + return (avg + 32) >> 6; +} + +unsigned int aom_avg_4x4_sse2(const uint8_t *s, int p) { + __m128i s0, s1, u0; + unsigned int avg = 0; + u0 = _mm_setzero_si128(); + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + + s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 4)); + s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16)); + avg = _mm_extract_epi16(s0, 0); + return (avg + 8) >> 4; +} + static void hadamard_col8_sse2(__m128i *in, int iter) { __m128i a0 = in[0]; __m128i a1 = in[1];

@@ -154,6 +154,8 @@ ARG_DEF(NULL, "skip", 1, "Skip the first n input frames"); static const arg_def_t good_dl = ARG_DEF(NULL, "good", 0, "Use Good Quality Deadline"); +static const arg_def_t rt_dl = + ARG_DEF(NULL, "rt", 0, "Use Realtime Quality Deadline"); static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0, "Do not print encode progress"); static const arg_def_t verbosearg = @@ -219,6 +221,7 @@ &limit, &skip, &good_dl, + &rt_dl, &quietarg, &verbosearg, &psnrarg, @@ -1064,7 +1067,9 @@ } else if (arg_match(&arg, &usage, argi)) global->usage = arg_parse_uint(&arg); else if (arg_match(&arg, &good_dl, argi)) - warn("Deprecated --good option! Ignoring

"); + global->usage = AOM_USAGE_GOOD_QUALITY; // Good quality usage + else if (arg_match(&arg, &rt_dl, argi)) + global->usage = AOM_USAGE_REALTIME; // Real-time usage else if (arg_match(&arg, &use_yv12, argi)) global->color_type = YV12; else if (arg_match(&arg, &use_i420, argi)) @@ -1117,11 +1122,19 @@ // Make default AV1 passes = 2 until there is a better quality 1-pass // encoder if (global->codec != NULL && global->codec->name != NULL) - global->passes = (strcmp(global->codec->name, "av1") == 0) ? 2 : 1; + global->passes = (strcmp(global->codec->name, "av1") == 0 && + global->usage != AOM_USAGE_REALTIME) + ? 2 + : 1; #else global->passes = 1; #endif } + + if (global->usage == AOM_USAGE_REALTIME && global->passes > 1) { + warn("Enforcing one-pass encoding in realtime mode

"); + global->passes = 1; + } } static void open_input_file(struct AvxInputContext *input, @@ -1355,6 +1368,12 @@ config->cfg.g_error_resilient = arg_parse_uint(&arg); } else if (arg_match(&arg, &lag_in_frames, argi)) { config->cfg.g_lag_in_frames = arg_parse_uint(&arg); + if (global->usage == AOM_USAGE_REALTIME && + config->cfg.rc_end_usage == AOM_CBR && + config->cfg.g_lag_in_frames != 0) { + warn("non-zero %s option ignored in realtime CBR mode.

", arg.name); + config->cfg.g_lag_in_frames = 0; + } } else if (arg_match(&arg, &large_scale_tile, argi)) { config->cfg.large_scale_tile = arg_parse_uint(&arg); if (config->cfg.large_scale_tile) global->codec = get_aom_lst_encoder();

@@ -194,6 +194,8 @@ "${AOM_ROOT}/av1/encoder/tpl_model.c" "${AOM_ROOT}/av1/encoder/tpl_model.h" "${AOM_ROOT}/av1/encoder/wedge_utils.c" + "${AOM_ROOT}/av1/encoder/var_based_part.c" + "${AOM_ROOT}/av1/encoder/var_based_part.h" "${AOM_ROOT}/third_party/fastfeat/fast.c" "${AOM_ROOT}/third_party/fastfeat/fast.h" "${AOM_ROOT}/third_party/fastfeat/fast_9.c"

@@ -295,6 +295,7 @@ RANGE_CHECK_HI(extra_cfg, aq_mode, AQ_MODE_COUNT - 1); RANGE_CHECK_HI(extra_cfg, deltaq_mode, DELTAQ_MODE_COUNT - 1); RANGE_CHECK_HI(extra_cfg, frame_periodic_boost, 1); + RANGE_CHECK_HI(cfg, g_usage, 1); RANGE_CHECK_HI(cfg, g_threads, MAX_NUM_THREADS); RANGE_CHECK_HI(cfg, g_lag_in_frames, MAX_LAG_BUFFERS); RANGE_CHECK(cfg, rc_end_usage, AOM_VBR, AOM_Q); @@ -498,6 +499,7 @@ oxcf->profile = cfg->g_profile; oxcf->fwd_kf_enabled = cfg->fwd_kf_enabled; oxcf->max_threads = (int)cfg->g_threads; + oxcf->mode = (cfg->g_usage == 1) ? REALTIME : GOOD; oxcf->width = cfg->g_w; oxcf->height = cfg->g_h; oxcf->forced_max_frame_width = cfg->g_forced_max_frame_width; @@ -540,7 +542,6 @@ oxcf->init_framerate = 30; oxcf->timing_info_present = 0; } - oxcf->mode = GOOD; oxcf->cfg = &cfg->cfg; switch (cfg->g_pass) { @@ -1507,8 +1508,7 @@ } } } - - if (ctx->oxcf.mode != GOOD) { + if (ctx->oxcf.mode != GOOD && ctx->oxcf.mode != REALTIME) { ctx->oxcf.mode = GOOD; av1_change_config(ctx->cpi, &ctx->oxcf); } @@ -2093,7 +2093,7 @@ { 0, { // NOLINT - 0, // g_usage + 0, // g_usage - non-realtime usage 0, // g_threads 0, // g_profile @@ -2158,6 +2158,74 @@ { 0 }, // tile_heights { 1 }, // config file } }, + { 1, + { + // NOLINT + 1, // g_usage - real-time usage + 0, // g_threads + 0, // g_profile + + 320, // g_width + 240, // g_height + 0, // g_limit + 0, // g_forced_max_frame_width + 0, // g_forced_max_frame_height + AOM_BITS_8, // g_bit_depth + 8, // g_input_bit_depth + + { 1, 30 }, // g_timebase + + 0, // g_error_resilient + + AOM_RC_ONE_PASS, // g_pass + + 1, // g_lag_in_frames + + 0, // rc_dropframe_thresh + RESIZE_NONE, // rc_resize_mode + SCALE_NUMERATOR, // rc_resize_denominator + SCALE_NUMERATOR, // rc_resize_kf_denominator + + 0, // rc_superres_mode + SCALE_NUMERATOR, // rc_superres_denominator + SCALE_NUMERATOR, // rc_superres_kf_denominator + 63, // rc_superres_qthresh + 32, // rc_superres_kf_qthresh + + AOM_CBR, // rc_end_usage + { NULL, 0 }, // rc_twopass_stats_in + { NULL, 0 }, // rc_firstpass_mb_stats_in + 256, // rc_target_bandwidth + 0, // rc_min_quantizer + 63, // rc_max_quantizer + 25, // rc_undershoot_pct + 25, // rc_overshoot_pct + + 6000, // rc_max_buffer_size + 4000, // rc_buffer_initial_size + 5000, // rc_buffer_optimal_size + + 50, // rc_two_pass_vbrbias + 0, // rc_two_pass_vbrmin_section + 2000, // rc_two_pass_vbrmax_section + + // keyframing settings (kf) + 0, // fwd_kf_enabled + AOM_KF_AUTO, // g_kfmode + 0, // kf_min_dist + 9999, // kf_max_dist + 0, // sframe_dist + 1, // sframe_mode + 0, // large_scale_tile + 0, // monochrome + 0, // full_still_picture_hdr + 0, // save_as_annexb + 0, // tile_width_count + 0, // tile_height_count + { 0 }, // tile_widths + { 0 }, // tile_heights + { 1 }, // config file + } }, }; #ifndef VERSION_STRING @@ -2181,7 +2249,7 @@ }, { // NOLINT - 1, // 1 cfg map + 2, // 2 cfg map encoder_usage_cfg_map, // aom_codec_enc_cfg_map_t encoder_encode, // aom_codec_encode_fn_t encoder_get_cxdata, // aom_codec_get_cx_data_fn_t

@@ -60,6 +60,7 @@ #include "av1/encoder/reconinter_enc.h" #include "av1/encoder/segmentation.h" #include "av1/encoder/tokenize.h" +#include "av1/encoder/var_based_part.h" static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data, ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run, @@ -74,7 +75,7 @@ // purposes of activity masking. // Eventually this should be replaced by custom no-reference routines, // which will be faster. -static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = { +const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, @@ -221,18 +222,6 @@ return BLOCK_8X8; } -// Lighter version of set_offsets that only sets the mode info -// pointers. -static void set_mode_info_offsets(const AV1_COMP *const cpi, - MACROBLOCK *const x, MACROBLOCKD *const xd, - int mi_row, int mi_col) { - const AV1_COMMON *const cm = &cpi->common; - const int idx_str = xd->mi_stride * mi_row + mi_col; - xd->mi = cm->mi_grid_visible + idx_str; - xd->mi[0] = cm->mi + idx_str; - x->mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); -} - static void set_offsets_without_segment_id(const AV1_COMP *const cpi, const TileInfo *const tile, MACROBLOCK *const x, int mi_row, @@ -524,11 +513,11 @@ return av1_edge_exists(ref->buf, ref->stride, width, height, high_bd, bd); } -static void rd_pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data, - MACROBLOCK *const x, int mi_row, int mi_col, - RD_STATS *rd_cost, PARTITION_TYPE partition, - BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, - int64_t best_rd) { +static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data, + MACROBLOCK *const x, int mi_row, int mi_col, + RD_STATS *rd_cost, PARTITION_TYPE partition, + BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, + int64_t best_rd, int use_nonrd_pick_mode) { AV1_COMMON *const cm = &cpi->common; const int num_planes = av1_num_planes(cm); TileInfo *const tile_info = &tile_data->tile_info; @@ -657,8 +646,15 @@ ctx->seg_feat = 1; #endif } else { - av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, - bsize, ctx, best_rd); + // TODO(kyslov): do the same for pick_intra_mode and + // pick_inter_mode_sb_seg_skip + if (use_nonrd_pick_mode) { + av1_nonrd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, + bsize, ctx, best_rd); + } else { + av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, + bsize, ctx, best_rd); + } #if CONFIG_ONE_PASS_SVM ctx->seg_feat = 0; #endif @@ -1627,25 +1623,6 @@ update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition); } -// Check to see if the given partition size is allowed for a specified number -// of mi block rows and columns remaining in the image. -// If not then return the largest allowed partition size -static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left, - int cols_left, int *bh, int *bw) { - if (rows_left <= 0 || cols_left <= 0) { - return AOMMIN(bsize, BLOCK_8X8); - } else { - for (; bsize > 0; bsize -= 3) { - *bh = mi_size_high[bsize]; - *bw = mi_size_wide[bsize]; - if ((*bh <= rows_left) && (*bw <= cols_left)) { - break; - } - } - } - return bsize; -} - static void set_partial_sb_partition(const AV1_COMMON *const cm, MB_MODE_INFO *mi, int bh_in, int bw_in, int mi_rows_remaining, @@ -1769,8 +1746,8 @@ if (partition != PARTITION_NONE && !splits_below && mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) { pc_tree->partitioning = PARTITION_NONE; - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, - PARTITION_NONE, bsize, ctx_none, INT64_MAX); + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, + PARTITION_NONE, bsize, ctx_none, INT64_MAX, 0); if (none_rdc.rate < INT_MAX) { none_rdc.rate += x->partition_cost[pl][PARTITION_NONE]; @@ -1798,13 +1775,13 @@ } switch (partition) { case PARTITION_NONE: - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, - PARTITION_NONE, bsize, ctx_none, INT64_MAX); + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, + PARTITION_NONE, bsize, ctx_none, INT64_MAX, 0); break; case PARTITION_HORZ: - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, - PARTITION_HORZ, subsize, &pc_tree->horizontal[0], - INT64_MAX); + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, + PARTITION_HORZ, subsize, &pc_tree->horizontal[0], INT64_MAX, + 0); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_row + hbs < cm->mi_rows) { RD_STATS tmp_rdc; @@ -1813,9 +1790,9 @@ update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1); encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize, NULL); - rd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc, - PARTITION_HORZ, subsize, &pc_tree->horizontal[1], - INT64_MAX); + pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc, + PARTITION_HORZ, subsize, &pc_tree->horizontal[1], + INT64_MAX, 0); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { av1_invalid_rd_stats(&last_part_rdc); break; @@ -1826,9 +1803,9 @@ } break; case PARTITION_VERT: - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, - PARTITION_VERT, subsize, &pc_tree->vertical[0], - INT64_MAX); + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, + PARTITION_VERT, subsize, &pc_tree->vertical[0], INT64_MAX, + 0); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_col + hbs < cm->mi_cols) { RD_STATS tmp_rdc; @@ -1837,9 +1814,9 @@ update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1); encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize, NULL); - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc, - PARTITION_VERT, subsize, - &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX); + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc, + PARTITION_VERT, subsize, + &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 0); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { av1_invalid_rd_stats(&last_part_rdc); break; @@ -1913,9 +1890,9 @@ save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); pc_tree->split[i]->partitioning = PARTITION_NONE; - rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, - &tmp_rdc, PARTITION_SPLIT, split_subsize, - &pc_tree->split[i]->none, INT64_MAX); + pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc, + PARTITION_SPLIT, split_subsize, &pc_tree->split[i]->none, + INT64_MAX, 0); restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { @@ -1976,6 +1953,186 @@ *dist = chosen_rdc.dist; } +// TODO(kyslov): now this is very similar to rd_use_partition (except that +// doesn't do extra search arounf suggested partitioning) +// consider passing a flag to select non-rd path (similar to +// encode_sb_row) +static void nonrd_use_partition(AV1_COMP *cpi, ThreadData *td, + TileDataEnc *tile_data, MB_MODE_INFO **mib, + TOKENEXTRA **tp, int mi_row, int mi_col, + BLOCK_SIZE bsize, int *rate, int64_t *dist, + int do_recon, PC_TREE *pc_tree) { + AV1_COMMON *const cm = &cpi->common; + const int num_planes = av1_num_planes(cm); + TileInfo *const tile_info = &tile_data->tile_info; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int bs = mi_size_wide[bsize]; + const int hbs = bs / 2; + int i; + const int pl = (bsize >= BLOCK_8X8) + ? partition_plane_context(xd, mi_row, mi_col, bsize) + : 0; + const PARTITION_TYPE partition = + (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize) + : PARTITION_NONE; + const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); + RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; + RD_STATS last_part_rdc; + PICK_MODE_CONTEXT *ctx_none = &pc_tree->none; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; + + assert(mi_size_wide[bsize] == mi_size_high[bsize]); + + av1_invalid_rd_stats(&last_part_rdc); + + pc_tree->partitioning = partition; + + xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col; + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); + save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); + + if (bsize == BLOCK_16X16 && cpi->vaq_refresh) { + set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); + x->mb_energy = av1_log_block_var(cpi, x, bsize); + } + + for (int b = 0; b < 2; ++b) { + pc_tree->horizontal[b].skip_ref_frame_mask = 0; + pc_tree->vertical[b].skip_ref_frame_mask = 0; + } + for (int b = 0; b < 3; ++b) { + pc_tree->horizontala[b].skip_ref_frame_mask = 0; + pc_tree->horizontalb[b].skip_ref_frame_mask = 0; + pc_tree->verticala[b].skip_ref_frame_mask = 0; + pc_tree->verticalb[b].skip_ref_frame_mask = 0; + } + for (int b = 0; b < 4; ++b) { + pc_tree->horizontal4[b].skip_ref_frame_mask = 0; + pc_tree->vertical4[b].skip_ref_frame_mask = 0; + } + switch (partition) { + case PARTITION_NONE: + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, + PARTITION_NONE, bsize, ctx_none, INT64_MAX, 1); + break; + case PARTITION_HORZ: + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, + PARTITION_HORZ, subsize, &pc_tree->horizontal[0], INT64_MAX, + 1); + if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && + mi_row + hbs < cm->mi_rows) { + RD_STATS tmp_rdc; + const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0]; + av1_init_rd_stats(&tmp_rdc); + update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1); + encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, + mi_col, subsize, NULL); + pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc, + PARTITION_HORZ, subsize, &pc_tree->horizontal[1], + INT64_MAX, 1); + if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { + av1_invalid_rd_stats(&last_part_rdc); + break; + } + last_part_rdc.rate += tmp_rdc.rate; + last_part_rdc.dist += tmp_rdc.dist; + last_part_rdc.rdcost += tmp_rdc.rdcost; + } + break; + case PARTITION_VERT: + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, + PARTITION_VERT, subsize, &pc_tree->vertical[0], INT64_MAX, + 1); + if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && + mi_col + hbs < cm->mi_cols) { + RD_STATS tmp_rdc; + const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0]; + av1_init_rd_stats(&tmp_rdc); + update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1); + encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, + mi_col, subsize, NULL); + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc, + PARTITION_VERT, subsize, + &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 1); + if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { + av1_invalid_rd_stats(&last_part_rdc); + break; + } + last_part_rdc.rate += tmp_rdc.rate; + last_part_rdc.dist += tmp_rdc.dist; + last_part_rdc.rdcost += tmp_rdc.rdcost; + } + break; + case PARTITION_SPLIT: + last_part_rdc.rate = 0; + last_part_rdc.dist = 0; + last_part_rdc.rdcost = 0; + for (i = 0; i < 4; i++) { + int x_idx = (i & 1) * hbs; + int y_idx = (i >> 1) * hbs; + int jj = i >> 1, ii = i & 0x01; + RD_STATS tmp_rdc; + if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) + continue; + + av1_init_rd_stats(&tmp_rdc); + nonrd_use_partition( + cpi, td, tile_data, mib + jj * hbs * cm->mi_stride + ii * hbs, tp, + mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, + &tmp_rdc.dist, i != 3, pc_tree->split[i]); + if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { + av1_invalid_rd_stats(&last_part_rdc); + break; + } + last_part_rdc.rate += tmp_rdc.rate; + last_part_rdc.dist += tmp_rdc.dist; + } + break; + case PARTITION_VERT_A: + case PARTITION_VERT_B: + case PARTITION_HORZ_A: + case PARTITION_HORZ_B: + case PARTITION_HORZ_4: + case PARTITION_VERT_4: + assert(0 && "Cannot handle extended partition types"); + default: assert(0); break; + } + + if (last_part_rdc.rate < INT_MAX) { + last_part_rdc.rate += x->partition_cost[pl][partition]; + last_part_rdc.rdcost = + RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist); + } + + restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); + + // We must have chosen a partitioning and encoding or we'll fail later on. + // No other opportunities for success. + if (bsize == cm->seq_params.sb_size) + assert(last_part_rdc.rate < INT_MAX && last_part_rdc.dist < INT64_MAX); + + if (do_recon) { + if (bsize == cm->seq_params.sb_size) { + // NOTE: To get estimate for rate due to the tokens, use: + // int rate_coeffs = 0; + // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS, + // bsize, pc_tree, &rate_coeffs); + x->cb_offset = 0; + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, + pc_tree, NULL); + } else { + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, + pc_tree, NULL); + } + } + + *rate = last_part_rdc.rate; + *dist = last_part_rdc.dist; +} + /* clang-format off */ static const BLOCK_SIZE min_partition_size[BLOCK_SIZES_ALL] = { BLOCK_4X4, // 4x4 @@ -2405,9 +2562,9 @@ ? INT64_MAX : (best_rdc->rdcost - sum_rdc->rdcost); - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc, - RTS_X_RATE_NOCOEF_ARG partition, subsize, this_ctx, - rdcost_remaining); + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc, + RTS_X_RATE_NOCOEF_ARG partition, subsize, this_ctx, + rdcost_remaining, 0); if (this_rdc->rate == INT_MAX) { sum_rdc->rdcost = INT64_MAX; @@ -2623,8 +2780,8 @@ const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX ? INT64_MAX : (best_rdc.rdcost - partition_rd_cost); - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, - PARTITION_NONE, bsize, ctx_none, best_remain_rdcost); + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_NONE, + bsize, ctx_none, best_remain_rdcost, 0); pc_tree->pc_tree_stats.rdcost = ctx_none->rdcost; pc_tree->pc_tree_stats.skip = ctx_none->skip; @@ -4103,8 +4260,8 @@ partition_attempts[PARTITION_NONE] += 1; } #endif - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, - PARTITION_NONE, bsize, ctx_none, best_remain_rdcost); + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_NONE, + bsize, ctx_none, best_remain_rdcost, 0); pb_source_variance = x->source_variance; if (none_rd) *none_rd = this_rdc.rdcost; cur_none_rd = this_rdc.rdcost; @@ -4433,9 +4590,8 @@ partition_attempts[PARTITION_HORZ] += 1; } #endif - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, - PARTITION_HORZ, subsize, &pc_tree->horizontal[0], - best_remain_rdcost); + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_HORZ, + subsize, &pc_tree->horizontal[0], best_remain_rdcost, 0); if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; @@ -4465,9 +4621,9 @@ pc_tree->horizontal[1].pred_interp_filter = av1_extract_interp_filter(ctx_h->mic.interp_filters, 0); } - rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, - PARTITION_HORZ, subsize, &pc_tree->horizontal[1], - best_rdc.rdcost - sum_rdc.rdcost); + pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, + PARTITION_HORZ, subsize, &pc_tree->horizontal[1], + best_rdc.rdcost - sum_rdc.rdcost, 0); horz_rd[1] = this_rdc.rdcost; if (this_rdc.rate == INT_MAX) { @@ -4515,9 +4671,8 @@ partition_attempts[PARTITION_VERT] += 1; } #endif - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, - PARTITION_VERT, subsize, &pc_tree->vertical[0], - best_remain_rdcost); + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_VERT, + subsize, &pc_tree->vertical[0], best_remain_rdcost, 0); if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; @@ -4546,9 +4701,9 @@ pc_tree->vertical[1].pred_interp_filter = av1_extract_interp_filter(ctx_none->mic.interp_filters, 0); } - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, - PARTITION_VERT, subsize, &pc_tree->vertical[1], - best_rdc.rdcost - sum_rdc.rdcost); + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, + PARTITION_VERT, subsize, &pc_tree->vertical[1], + best_rdc.rdcost - sum_rdc.rdcost, 0); vert_rd[1] = this_rdc.rdcost; if (this_rdc.rate == INT_MAX) { @@ -5531,9 +5686,8 @@ CFL_ALPHABET_SIZE); } -static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td, - TileDataEnc *tile_data, int mi_row, - TOKENEXTRA **tp) { +static void encode_sb_row(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, + int mi_row, TOKENEXTRA **tp, int use_nonrd_mode) { AV1_COMMON *const cm = &cpi->common; const int num_planes = av1_num_planes(cm); const TileInfo *const tile_info = &tile_data->tile_info; @@ -5642,6 +5796,13 @@ set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size, &dummy_rate, &dummy_dist, 1, pc_root); + } else if (sf->partition_search_type == VAR_BASED_PARTITION && + use_nonrd_mode) { + set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); + av1_choose_var_based_partitioning(cpi, tile_info, x, mi_row, mi_col); + nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size, + &dummy_rate, &dummy_dist, 1, pc_root); + } else { const int orig_rdmult = cpi->rd.RDMULT; x->cb_rdmult = orig_rdmult; @@ -5812,7 +5973,7 @@ cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes); cpi->tplist[tile_row][tile_col][sb_row_in_tile].start = tok; - encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); + encode_sb_row(cpi, td, this_tile, mi_row, &tok, cpi->sf.use_nonrd_pick_mode); cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop = tok; cpi->tplist[tile_row][tile_col][sb_row_in_tile].count =

@@ -71,6 +71,7 @@ #include "av1/encoder/segmentation.h" #include "av1/encoder/speed_features.h" #include "av1/encoder/reconinter_enc.h" +#include "av1/encoder/var_based_part.h" #define DEFAULT_EXPLICIT_ORDER_HINT_BITS 7 @@ -4729,6 +4730,9 @@ } av1_set_quantizer(cm, q); av1_init_quantizer(cpi); + + av1_set_variance_partition_thresholds(cpi, q, 0); + // printf("Frame %d/%d: q = %d, frame_type = %d superres_denom = %d

", // cm->current_frame.frame_number, cm->show_frame, q, // cm->current_frame.frame_type, cm->superres_scale_denominator);

@@ -24,6 +24,8 @@ #include "av1/common/onyxc_int.h" #include "av1/common/resize.h" #include "av1/common/timing.h" +#include "av1/common/blockd.h" +#include "av1/common/enums.h" #include "av1/encoder/aq_cyclicrefresh.h" #include "av1/encoder/av1_quantize.h" #include "av1/encoder/context_tree.h" @@ -36,6 +38,7 @@ #include "av1/encoder/rd.h" #include "av1/encoder/speed_features.h" #include "av1/encoder/tokenize.h" +#include "av1/encoder/block.h" #if CONFIG_INTERNAL_STATS #include "aom_dsp/ssim.h" @@ -85,7 +88,10 @@ enum { // Good Quality Fast Encoding. The encoder balances quality with the amount of // time it takes to encode the output. Speed setting controls how fast. - GOOD + GOOD, + // Realtime Fast Encoding. Will force some restrictions on bitrate + // constraints. + REALTIME } UENUM1BYTE(MODE); enum { @@ -132,6 +138,16 @@ SUPERRES_MODES } UENUM1BYTE(SUPERRES_MODE); +typedef enum { + kInvalid = 0, + kLowSadLowSumdiff = 1, + kLowSadHighSumdiff = 2, + kHighSadLowSumdiff = 3, + kHighSadHighSumdiff = 4, + kLowVarHighSumdiff = 5, + kVeryHighSad = 6, +} CONTENT_STATE_SB; + typedef struct TplDepStats { int64_t intra_cost; int64_t inter_cost; @@ -825,6 +841,16 @@ // VARIANCE_AQ segment map refresh int vaq_refresh; + // VAR_BASED_PARTITION thresholds + // 0 - threshold_128x128; 1 - threshold_64x64; + // 2 - threshold_32x32; 3 - threshold_16x16; + // 4 - vbp_threshold_8x8; + int64_t vbp_thresholds[5]; + int64_t vbp_threshold_minmax; + int64_t vbp_threshold_sad; + int64_t vbp_threshold_copy; + BLOCK_SIZE vbp_bsize_min; + // Multi-threading int num_workers; AVxWorker *workers; @@ -1113,6 +1139,39 @@ cm->current_frame.frame_type == KEY_FRAME); } +// Lighter version of set_offsets that only sets the mode info +// pointers. +static INLINE void set_mode_info_offsets(const AV1_COMP *const cpi, + MACROBLOCK *const x, + MACROBLOCKD *const xd, int mi_row, + int mi_col) { + const AV1_COMMON *const cm = &cpi->common; + const int idx_str = xd->mi_stride * mi_row + mi_col; + xd->mi = cm->mi_grid_visible + idx_str; + xd->mi[0] = cm->mi + idx_str; + x->mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); +} + +// Check to see if the given partition size is allowed for a specified number +// of mi block rows and columns remaining in the image. +// If not then return the largest allowed partition size +static INLINE BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left, + int cols_left, int *bh, int *bw) { + int int_size = (int)bsize; + if (rows_left <= 0 || cols_left <= 0) { + return AOMMIN(bsize, BLOCK_8X8); + } else { + for (; int_size > 0; int_size -= 3) { + *bh = mi_size_high[int_size]; + *bw = mi_size_wide[int_size]; + if ((*bh <= rows_left) && (*bw <= cols_left)) { + break; + } + } + } + return (BLOCK_SIZE)int_size; +} + // Returns a Sequence Header OBU stored in an aom_fixed_buf_t, or NULL upon // failure. When a non-NULL aom_fixed_buf_t pointer is returned by this // function, the memory must be freed by the caller. Both the buf member of the

@@ -11107,6 +11107,121 @@ } x->comp_rd_stats_idx = 0; } +// TODO(kyslov): now this is very similar to set_params_rd_pick_inter_mode +// (except that doesn't set ALTREF parameters) +// consider passing a flag to select non-rd path (similar to +// encode_sb_row) +static void set_params_nonrd_pick_inter_mode( + const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args, + BLOCK_SIZE bsize, int mi_row, int mi_col, mode_skip_mask_t *mode_skip_mask, + int skip_ref_frame_mask, unsigned int ref_costs_single[REF_FRAMES], + unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES], + struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) { + const AV1_COMMON *const cm = &cpi->common; + const int num_planes = av1_num_planes(cm); + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = xd->mi[0]; + MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; + unsigned char segment_id = mbmi->segment_id; + int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; + int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, + MAX_SB_SIZE >> 1 }; + int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, + MAX_SB_SIZE >> 1 }; + int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; + + for (int i = 0; i < MB_MODE_COUNT; ++i) + for (int k = 0; k < REF_FRAMES; ++k) args->single_filter[i][k] = SWITCHABLE; + + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + int len = sizeof(uint16_t); + args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf); + args->above_pred_buf[1] = + CONVERT_TO_BYTEPTR(x->above_pred_buf + (MAX_SB_SQUARE >> 1) * len); + args->above_pred_buf[2] = + CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len); + args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf); + args->left_pred_buf[1] = + CONVERT_TO_BYTEPTR(x->left_pred_buf + (MAX_SB_SQUARE >> 1) * len); + args->left_pred_buf[2] = + CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len); + } else { + args->above_pred_buf[0] = x->above_pred_buf; + args->above_pred_buf[1] = x->above_pred_buf + (MAX_SB_SQUARE >> 1); + args->above_pred_buf[2] = x->above_pred_buf + MAX_SB_SQUARE; + args->left_pred_buf[0] = x->left_pred_buf; + args->left_pred_buf[1] = x->left_pred_buf + (MAX_SB_SQUARE >> 1); + args->left_pred_buf[2] = x->left_pred_buf + MAX_SB_SQUARE; + } + + av1_collect_neighbors_ref_counts(xd); + + estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single, + ref_costs_comp); + + MV_REFERENCE_FRAME ref_frame; + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + x->pred_mv_sad[ref_frame] = INT_MAX; + x->mbmi_ext->mode_context[ref_frame] = 0; + x->mbmi_ext->compound_mode_context[ref_frame] = 0; + mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX; + if (cpi->ref_frame_flags & ref_frame_flag_list[ref_frame]) { + if (mbmi->partition != PARTITION_NONE && + mbmi->partition != PARTITION_SPLIT) { + if (skip_ref_frame_mask & (1 << ref_frame)) { + int skip = 1; + for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) { + if (!(skip_ref_frame_mask & (1 << r))) { + const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES]; + if (rf[0] == ref_frame || rf[1] == ref_frame) { + skip = 0; + break; + } + } + } + if (skip) continue; + } + } + assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL); + setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, + yv12_mb); + } + } + + av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col); + + if (check_num_overlappable_neighbors(mbmi) && + is_motion_variation_allowed_bsize(bsize)) { + av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, + args->above_pred_buf, dst_width1, + dst_height1, args->above_pred_stride); + av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, + args->left_pred_buf, dst_width2, + dst_height2, args->left_pred_stride); + av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, + 0, num_planes); + calc_target_weighted_pred( + cm, x, xd, mi_row, mi_col, args->above_pred_buf[0], + args->above_pred_stride[0], args->left_pred_buf[0], + args->left_pred_stride[0]); + } + + init_mode_skip_mask(mode_skip_mask, cpi, x, bsize); + + if (cpi->sf.tx_type_search.fast_intra_tx_type_search) + x->use_default_intra_tx_type = 1; + else + x->use_default_intra_tx_type = 0; + + if (cpi->sf.tx_type_search.fast_inter_tx_type_search) + x->use_default_inter_tx_type = 1; + else + x->use_default_inter_tx_type = 0; + if (cpi->sf.skip_repeat_interpolation_filter_search) { + x->interp_filter_stats_idx[0] = 0; + x->interp_filter_stats_idx[1] = 0; + } +} static void search_palette_mode(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, RD_STATS *rd_cost, @@ -12582,7 +12697,6 @@ #endif } } - // In effect only when speed >= 2. sf_refine_fast_tx_type_search( cpi, x, mi_row, mi_col, rd_cost, bsize, ctx, search_state.best_mode_index, @@ -12594,7 +12708,6 @@ search_palette_mode(cpi, x, mi_row, mi_col, rd_cost, ctx, bsize, mbmi, pmi, ref_costs_single, &search_state); } - search_state.best_mbmode.skip_mode = 0; if (cm->current_frame.skip_mode_info.skip_mode_flag && !segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && @@ -12672,6 +12785,535 @@ } } +// TODO(kyslov): now this is very similar to av1_rd_pick_inter_mode_sb except: +// it only checks non-compound mode and +// it doesn't check palette mode +// it doesn't refine tx search +// this function is likely to be heavily modified with nonrd mode +// decision +void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data, + MACROBLOCK *x, int mi_row, int mi_col, + RD_STATS *rd_cost, BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx, + int64_t best_rd_so_far) { + AV1_COMMON *const cm = &cpi->common; + const int num_planes = av1_num_planes(cm); + const SPEED_FEATURES *const sf = &cpi->sf; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = xd->mi[0]; + const struct segmentation *const seg = &cm->seg; + PREDICTION_MODE this_mode; + unsigned char segment_id = mbmi->segment_id; + int i; + struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]; + unsigned int ref_costs_single[REF_FRAMES]; + unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES]; + int *comp_inter_cost = x->comp_inter_cost[av1_get_reference_mode_context(xd)]; + mode_skip_mask_t mode_skip_mask; + uint8_t motion_mode_skip_mask = 0; // second pass of single ref modes +#if CONFIG_ONE_PASS_SVM + int temp_y_eob = 0, temp_y_eob_0 = 0, temp_y_eob_1 = 0, temp_y_eob_2 = 0, + temp_y_eob_3 = 0; + int64_t temp_y_rd = 0, temp_y_rd_0 = 0, temp_y_rd_1 = 0, temp_y_rd_2 = 0, + temp_y_rd_3 = 0; +#endif + + InterModeSearchState search_state; + init_inter_mode_search_state(&search_state, cpi, tile_data, x, bsize, + best_rd_so_far); + INTERINTRA_MODE interintra_modes[REF_FRAMES] = { + INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, + INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES + }; + HandleInterModeArgs args = { + { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }, + { NULL }, { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1 }, + NULL, NULL, + NULL, search_state.modelled_rd, + { { 0 } }, INT_MAX, + INT_MAX, search_state.simple_rd, + 0, interintra_modes, + 1, NULL + }; + for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; + + av1_invalid_rd_stats(rd_cost); + + // init params, set frame modes, speed features + set_params_nonrd_pick_inter_mode(cpi, x, &args, bsize, mi_row, mi_col, + &mode_skip_mask, ctx->skip_ref_frame_mask, + ref_costs_single, ref_costs_comp, yv12_mb); + + int64_t best_est_rd = INT64_MAX; + // TODO(angiebird): Turn this on when this speed feature is well tested + const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize]; + const int do_tx_search = + !((cpi->sf.inter_mode_rd_model_estimation == 1 && md->ready) || + (cpi->sf.inter_mode_rd_model_estimation == 2 && + x->source_variance < 512)); + InterModesInfo *inter_modes_info = x->inter_modes_info; + inter_modes_info->num = 0; + + int intra_mode_num = 0; + int intra_mode_idx_ls[MAX_MODES]; + int reach_first_comp_mode = 0; + + // Temporary buffers used by handle_inter_mode(). + uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_obmc_bufs[0]); + + CompoundTypeRdBuffers rd_buffers; + alloc_compound_type_rd_buffers(cm, &rd_buffers); + + for (int midx = 0; midx < LAST_SINGLE_REF_MODES + 1; ++midx) { + const MODE_DEFINITION *mode_order = &av1_mode_order[midx]; + this_mode = mode_order->mode; + const MV_REFERENCE_FRAME ref_frame = mode_order->ref_frame[0]; + const MV_REFERENCE_FRAME second_ref_frame = mode_order->ref_frame[1]; + const int comp_pred = second_ref_frame > INTRA_FRAME; + + if (ref_frame > LAST_FRAME) continue; + + // When single ref motion search ends: + // 1st pass: To evaluate single ref RD results and rewind to the beginning; + // 2nd pass: To continue with compound ref search. + if (sf->prune_single_motion_modes_by_simple_trans) { + if (comp_pred && args.single_ref_first_pass) { + args.single_ref_first_pass = 0; + // Reach the first comp ref mode + // Reset midx to start the 2nd pass for single ref motion search + midx = -1; + motion_mode_skip_mask = analyze_simple_trans_states(cpi, x); + continue; + } + if (!comp_pred) { // single ref mode + if (args.single_ref_first_pass) { + // clear stats + for (int k = 0; k < MAX_REF_MV_SERCH; ++k) { + x->simple_rd_state[midx][k].rd_stats.rdcost = INT64_MAX; + x->simple_rd_state[midx][k].early_skipped = 0; + } + } else { + if (motion_mode_skip_mask & (1 << ref_frame)) { + continue; + } + } + } + } + + // Reach the first compound prediction mode + if (sf->prune_comp_search_by_single_result > 0 && comp_pred && + reach_first_comp_mode == 0) { + analyze_single_states(cpi, &search_state); + reach_first_comp_mode = 1; + } + int64_t this_rd = INT64_MAX; + int disable_skip = 0; + int rate2 = 0, rate_y = 0, rate_uv = 0; + int64_t distortion2 = 0; + int skippable = 0; + int this_skip2 = 0; + + init_mbmi(mbmi, midx, cm); + + x->skip = 0; + set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); + + if (inter_mode_compatible_skip(cpi, x, bsize, midx)) continue; + + const int ret = inter_mode_search_order_independent_skip( + cpi, ctx, x, bsize, midx, mi_row, mi_col, &mode_skip_mask, + &search_state); + if (ret == 1) continue; + args.skip_motion_mode = (ret == 2); + + if (sf->drop_ref && comp_pred) { + if (sf_check_is_drop_ref(mode_order, &search_state)) { + continue; + } + } + + if (search_state.best_rd < search_state.mode_threshold[midx]) continue; + + if (sf->prune_comp_search_by_single_result > 0 && comp_pred) { + if (compound_skip_by_single_states(cpi, &search_state, this_mode, + ref_frame, second_ref_frame, x)) + continue; + } + + const int ref_frame_cost = comp_pred + ? ref_costs_comp[ref_frame][second_ref_frame] + : ref_costs_single[ref_frame]; + const int compmode_cost = + is_comp_ref_allowed(mbmi->sb_type) ? comp_inter_cost[comp_pred] : 0; + const int real_compmode_cost = + cm->current_frame.reference_mode == REFERENCE_MODE_SELECT + ? compmode_cost + : 0; + + if (comp_pred) { + if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && + search_state.best_mode_index >= 0 && + search_state.best_mbmode.ref_frame[0] == INTRA_FRAME) + continue; + } + + if (ref_frame == INTRA_FRAME) { + if (!cpi->oxcf.enable_smooth_intra && + (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED || + mbmi->mode == SMOOTH_V_PRED)) + continue; + if (!cpi->oxcf.enable_paeth_intra && mbmi->mode == PAETH_PRED) continue; + if (sf->adaptive_mode_search > 1) + if ((x->source_variance << num_pels_log2_lookup[bsize]) > + search_state.best_pred_sse) + continue; + + if (this_mode != DC_PRED) { + // Only search the oblique modes if the best so far is + // one of the neighboring directional modes + if ((sf->mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && + (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) { + if (search_state.best_mode_index >= 0 && + search_state.best_mbmode.ref_frame[0] > INTRA_FRAME) + continue; + } + if (sf->mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { + if (conditional_skipintra(this_mode, search_state.best_intra_mode)) + continue; + } + } + } + + // Select prediction reference frames. + for (i = 0; i < num_planes; i++) { + xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; + if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; + } + + if (ref_frame == INTRA_FRAME) { + intra_mode_idx_ls[intra_mode_num++] = midx; + continue; + } else { + mbmi->angle_delta[PLANE_TYPE_Y] = 0; + mbmi->angle_delta[PLANE_TYPE_UV] = 0; + mbmi->filter_intra_mode_info.use_filter_intra = 0; + mbmi->ref_mv_idx = 0; + int64_t ref_best_rd = search_state.best_rd; + { + RD_STATS rd_stats, rd_stats_y, rd_stats_uv; + av1_init_rd_stats(&rd_stats); + rd_stats.rate = rate2; + + // Point to variables that are maintained between loop iterations + args.single_newmv = search_state.single_newmv; + args.single_newmv_rate = search_state.single_newmv_rate; + args.single_newmv_valid = search_state.single_newmv_valid; + args.single_comp_cost = real_compmode_cost; + args.ref_frame_cost = ref_frame_cost; + if (midx < MAX_SINGLE_REF_MODES) { + args.simple_rd_state = x->simple_rd_state[midx]; + } + this_rd = handle_inter_mode( + cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, + &disable_skip, mi_row, mi_col, &args, ref_best_rd, tmp_buf, + &rd_buffers, &best_est_rd, do_tx_search, inter_modes_info); + rate2 = rd_stats.rate; + skippable = rd_stats.skip; + distortion2 = rd_stats.dist; + rate_y = rd_stats_y.rate; + rate_uv = rd_stats_uv.rate; +#if CONFIG_ONE_PASS_SVM + av1_unpack_reg_stat(&rd_stats_y, &temp_y_eob, &temp_y_eob_0, + &temp_y_eob_1, &temp_y_eob_2, &temp_y_eob_3, + &temp_y_rd, &temp_y_rd_0, &temp_y_rd_1, + &temp_y_rd_2, &temp_y_rd_3); +#endif + } + + if (sf->prune_comp_search_by_single_result > 0 && + is_inter_singleref_mode(this_mode) && args.single_ref_first_pass) { + collect_single_states(x, &search_state, mbmi); + } + + if (this_rd == INT64_MAX) continue; + + this_skip2 = mbmi->skip; + this_rd = RDCOST(x->rdmult, rate2, distortion2); + if (this_skip2) { + rate_y = 0; + rate_uv = 0; + } + } + + // Did this mode help.. i.e. is it the new best mode + if (this_rd < search_state.best_rd || x->skip) { + int mode_excluded = 0; + if (comp_pred) { + mode_excluded = cm->current_frame.reference_mode == SINGLE_REFERENCE; + } + if (!mode_excluded) { + // Note index of best mode so far + search_state.best_mode_index = midx; + + if (ref_frame == INTRA_FRAME) { + /* required for left and above block mv */ + mbmi->mv[0].as_int = 0; + } else { + search_state.best_pred_sse = x->pred_sse[ref_frame]; + } + + rd_cost->rate = rate2; + rd_cost->dist = distortion2; + rd_cost->rdcost = this_rd; + search_state.best_rd = this_rd; + search_state.best_mbmode = *mbmi; + search_state.best_skip2 = this_skip2; + search_state.best_mode_skippable = skippable; + if (do_tx_search) { + // When do_tx_search == 0, handle_inter_mode won't provide correct + // rate_y and rate_uv because txfm_search process is replaced by + // rd estimation. + // Therfore, we should avoid updating best_rate_y and best_rate_uv + // here. These two values will be updated when txfm_search is called + search_state.best_rate_y = + rate_y + + x->skip_cost[av1_get_skip_context(xd)][this_skip2 || skippable]; + search_state.best_rate_uv = rate_uv; + +#if CONFIG_ONE_PASS_SVM + av1_set_reg_stat(rd_cost, temp_y_eob, temp_y_eob_0, temp_y_eob_1, + temp_y_eob_2, temp_y_eob_3, temp_y_rd, temp_y_rd_0, + temp_y_rd_1, temp_y_rd_2, temp_y_rd_3); +#endif + } + memcpy(ctx->blk_skip, x->blk_skip, + sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); + } + } + + /* keep record of best compound/single-only prediction */ + if (!disable_skip && ref_frame != INTRA_FRAME) { + int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; + + if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) { + single_rate = rate2 - compmode_cost; + hybrid_rate = rate2; + } else { + single_rate = rate2; + hybrid_rate = rate2 + compmode_cost; + } + + single_rd = RDCOST(x->rdmult, single_rate, distortion2); + hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2); + + if (!comp_pred) { + if (single_rd < search_state.best_pred_rd[SINGLE_REFERENCE]) + search_state.best_pred_rd[SINGLE_REFERENCE] = single_rd; + } else { + if (single_rd < search_state.best_pred_rd[COMPOUND_REFERENCE]) + search_state.best_pred_rd[COMPOUND_REFERENCE] = single_rd; + } + if (hybrid_rd < search_state.best_pred_rd[REFERENCE_MODE_SELECT]) + search_state.best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; + } + if (sf->drop_ref && second_ref_frame == NONE_FRAME) { + // Collect data from single ref mode, and analyze data. + sf_drop_ref_analyze(&search_state, mode_order, distortion2); + } + + if (x->skip && !comp_pred) break; + } + + release_compound_type_rd_buffers(&rd_buffers); + + if (!do_tx_search) { + inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr); + search_state.best_rd = INT64_MAX; + + int64_t top_est_rd = + inter_modes_info->num > 0 + ? inter_modes_info + ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx] + : INT64_MAX; + for (int j = 0; j < inter_modes_info->num; ++j) { + const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx; + *mbmi = inter_modes_info->mbmi_arr[data_idx]; + int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx]; + if (curr_est_rd * 0.80 > top_est_rd) break; + + const int mode_rate = inter_modes_info->mode_rate_arr[data_idx]; + + x->skip = 0; + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + + // Select prediction reference frames. + const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME; + for (i = 0; i < num_planes; i++) { + xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; + if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; + } + + RD_STATS rd_stats; + RD_STATS rd_stats_y; + RD_STATS rd_stats_uv; + + av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, + av1_num_planes(cm) - 1); + if (mbmi->motion_mode == OBMC_CAUSAL) + av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); + + if (!txfm_search(cpi, tile_data, x, bsize, mi_row, mi_col, &rd_stats, + &rd_stats_y, &rd_stats_uv, mode_rate, + search_state.best_rd)) { + continue; + } else if (cpi->sf.inter_mode_rd_model_estimation == 1) { + const int skip_ctx = av1_get_skip_context(xd); + inter_mode_data_push(tile_data, mbmi->sb_type, rd_stats.sse, + rd_stats.dist, + rd_stats_y.rate + rd_stats_uv.rate + + x->skip_cost[skip_ctx][mbmi->skip]); + } + rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist); + + if (rd_stats.rdcost < search_state.best_rd) { + search_state.best_rd = rd_stats.rdcost; + // Note index of best mode so far + const int mode_index = get_prediction_mode_idx( + mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]); + search_state.best_mode_index = mode_index; + *rd_cost = rd_stats; + search_state.best_rd = rd_stats.rdcost; + search_state.best_mbmode = *mbmi; + search_state.best_skip2 = mbmi->skip; + search_state.best_mode_skippable = rd_stats.skip; + search_state.best_rate_y = + rd_stats_y.rate + + x->skip_cost[av1_get_skip_context(xd)][rd_stats.skip || mbmi->skip]; + search_state.best_rate_uv = rd_stats_uv.rate; + memcpy(ctx->blk_skip, x->blk_skip, + sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); +#if CONFIG_ONE_PASS_SVM + av1_copy_reg_stat(rd_cost, &rd_stats_y); +#endif + } + } + } + + for (int j = 0; j < intra_mode_num; ++j) { + const int mode_index = intra_mode_idx_ls[j]; + const MV_REFERENCE_FRAME ref_frame = + av1_mode_order[mode_index].ref_frame[0]; + assert(av1_mode_order[mode_index].ref_frame[1] == NONE_FRAME); + assert(ref_frame == INTRA_FRAME); + if (sf->skip_intra_in_interframe && search_state.skip_intra_modes) break; + init_mbmi(mbmi, mode_index, cm); + x->skip = 0; + set_ref_ptrs(cm, xd, INTRA_FRAME, NONE_FRAME); + + // Select prediction reference frames. + for (i = 0; i < num_planes; i++) { + xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; + } + + RD_STATS intra_rd_stats, intra_rd_stats_y, intra_rd_stats_uv; + + const int ref_frame_cost = ref_costs_single[ref_frame]; + intra_rd_stats.rdcost = handle_intra_mode( + &search_state, cpi, x, bsize, mi_row, mi_col, ref_frame_cost, ctx, 0, + &intra_rd_stats, &intra_rd_stats_y, &intra_rd_stats_uv); + if (intra_rd_stats.rdcost < search_state.best_rd) { + search_state.best_rd = intra_rd_stats.rdcost; + // Note index of best mode so far + search_state.best_mode_index = mode_index; + *rd_cost = intra_rd_stats; + search_state.best_rd = intra_rd_stats.rdcost; + search_state.best_mbmode = *mbmi; + search_state.best_skip2 = 0; + search_state.best_mode_skippable = intra_rd_stats.skip; + search_state.best_rate_y = + intra_rd_stats_y.rate + + x->skip_cost[av1_get_skip_context(xd)][intra_rd_stats.skip]; + search_state.best_rate_uv = intra_rd_stats_uv.rate; + memcpy(ctx->blk_skip, x->blk_skip, + sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); +#if CONFIG_ONE_PASS_SVM + av1_copy_reg_stat(rd_cost, &intra_rd_stats_y); +#endif + } + } + + search_state.best_mbmode.skip_mode = 0; + if (cm->current_frame.skip_mode_info.skip_mode_flag && + !segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && + is_comp_ref_allowed(bsize)) { + rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, mi_row, mi_col, + yv12_mb); + } + + // Make sure that the ref_mv_idx is only nonzero when we're + // using a mode which can support ref_mv_idx + if (search_state.best_mbmode.ref_mv_idx != 0 && + !(search_state.best_mbmode.mode == NEWMV || + search_state.best_mbmode.mode == NEW_NEWMV || + have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) { + search_state.best_mbmode.ref_mv_idx = 0; + } + + if (search_state.best_mode_index < 0 || + search_state.best_rd >= best_rd_so_far) { + rd_cost->rate = INT_MAX; + rd_cost->rdcost = INT64_MAX; + return; + } + + assert( + (cm->interp_filter == SWITCHABLE) || + (cm->interp_filter == + av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 0)) || + !is_inter_block(&search_state.best_mbmode)); + assert( + (cm->interp_filter == SWITCHABLE) || + (cm->interp_filter == + av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 1)) || + !is_inter_block(&search_state.best_mbmode)); + + if (!cpi->rc.is_src_frame_alt_ref) + av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact, + sf->adaptive_rd_thresh, bsize, + search_state.best_mode_index); + + // macroblock modes + *mbmi = search_state.best_mbmode; + x->skip |= search_state.best_skip2; + + // Note: this section is needed since the mode may have been forced to + // GLOBALMV by the all-zero mode handling of ref-mv. + if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) { + // Correct the interp filters for GLOBALMV + if (is_nontrans_global_motion(xd, xd->mi[0])) { + assert(mbmi->interp_filters == + av1_broadcast_interp_filter( + av1_unswitchable_filter(cm->interp_filter))); + } + } + + for (i = 0; i < REFERENCE_MODES; ++i) { + if (search_state.best_pred_rd[i] == INT64_MAX) + search_state.best_pred_diff[i] = INT_MIN; + else + search_state.best_pred_diff[i] = + search_state.best_rd - search_state.best_pred_rd[i]; + } + + x->skip |= search_state.best_mode_skippable; + + assert(search_state.best_mode_index >= 0); + + store_coding_context(x, ctx, search_state.best_mode_index, + search_state.best_pred_diff, + search_state.best_mode_skippable); +} + void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x, int mi_row, int mi_col,

@@ -123,6 +123,13 @@ struct RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far); +void av1_nonrd_pick_inter_mode_sb(struct AV1_COMP *cpi, + struct TileDataEnc *tile_data, + struct macroblock *x, int mi_row, int mi_col, + struct RD_STATS *rd_cost, BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx, + int64_t best_rd_so_far); + void av1_rd_pick_inter_mode_sb_seg_skip( const struct AV1_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x, int mi_row, int mi_col, struct RD_STATS *rd_cost,

@@ -237,6 +237,7 @@ sf->disable_wedge_search_edge_thresh = 0; sf->prune_motion_mode_level = 1; sf->cb_pred_filter_search = 0; + sf->use_nonrd_pick_mode = 0; if (speed >= 1) { sf->gm_erroradv_type = GM_ERRORADV_TR_1; @@ -407,6 +408,211 @@ } } +// TODO(kyslov): now this is very similar to +// set_good_speed_features_framesize_independent +// except it sets non-rd flag on speed8. This function will likely +// be modified in the future with RT-specific speed features +static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi, + SPEED_FEATURES *sf, + int speed) { + AV1_COMMON *const cm = &cpi->common; + const int boosted = frame_is_boosted(cpi); + + // Speed 0 for all speed features that give neutral coding performance change. + sf->reduce_inter_modes = 1; + sf->prune_ext_partition_types_search_level = 1; + sf->ml_prune_rect_partition = 1; + sf->ml_prune_ab_partition = 1; + sf->ml_prune_4_partition = 1; + sf->adaptive_txb_search_level = 1; + sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH; + sf->model_based_prune_tx_search_level = 1; + sf->model_based_post_interp_filter_breakout = 1; + sf->model_based_motion_mode_rd_breakout = 1; + + // TODO(debargha): Test, tweak and turn on either 1 or 2 + sf->inter_mode_rd_model_estimation = 0; + + sf->prune_ref_frame_for_rect_partitions = + !(boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame); + sf->prune_ref_mode_for_partitions = sf->prune_ref_frame_for_rect_partitions; + sf->less_rectangular_check_level = 1; + sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3; + sf->gm_disable_recode = 1; + sf->use_fast_interpolation_filter_search = 1; + sf->intra_tx_size_search_init_depth_sqr = 1; + sf->intra_angle_estimation = 1; + sf->selective_ref_frame = 1; + sf->prune_wedge_pred_diff_based = 1; + sf->disable_wedge_search_var_thresh = 0; + sf->disable_wedge_search_edge_thresh = 0; + sf->prune_motion_mode_level = 1; + sf->cb_pred_filter_search = 0; + sf->use_nonrd_pick_mode = 0; + + if (speed >= 1) { + sf->gm_erroradv_type = GM_ERRORADV_TR_1; + sf->selective_ref_frame = 2; + + sf->inter_tx_size_search_init_depth_rect = 1; + sf->inter_tx_size_search_init_depth_sqr = 1; + sf->intra_tx_size_search_init_depth_rect = 1; + sf->tx_size_search_lgr_block = 1; + sf->prune_ext_partition_types_search_level = 2; + sf->skip_repeat_interpolation_filter_search = 1; + sf->tx_type_search.skip_tx_search = 1; + sf->tx_type_search.ml_tx_split_thresh = 40; + sf->model_based_prune_tx_search_level = 0; + sf->adaptive_txb_search_level = 2; + sf->use_intra_txb_hash = 1; + sf->optimize_b_precheck = 1; + sf->dual_sgr_penalty_level = 1; + sf->use_accurate_subpel_search = USE_4_TAPS; + sf->reuse_inter_intra_mode = 1; + sf->prune_comp_search_by_single_result = 1; + sf->skip_repeated_newmv = 1; + sf->obmc_full_pixel_search_level = 1; + // TODO(anyone): Following speed feature will be further explored to + // identify the appropriate tradeoff between encoder performance and its + // speed. + sf->prune_single_motion_modes_by_simple_trans = 1; + + sf->simple_motion_search_prune_rect = 1; + + sf->disable_wedge_search_var_thresh = 0; + sf->disable_wedge_search_edge_thresh = 0; + sf->prune_comp_type_by_comp_avg = 1; + sf->prune_motion_mode_level = 2; + sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2; + sf->cb_pred_filter_search = 1; + sf->use_transform_domain_distortion = boosted ? 0 : 1; + } + + if (speed >= 2) { + sf->gm_erroradv_type = GM_ERRORADV_TR_2; + + sf->selective_ref_frame = 3; + sf->fast_cdef_search = 1; + + sf->adaptive_rd_thresh = 1; + sf->mv.auto_mv_step_size = 1; + sf->mv.subpel_iters_per_step = 1; + sf->disable_filter_search_var_thresh = 100; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL; + + sf->partition_search_breakout_rate_thr = 80; + // Note: This speed feature is disable as it seems to be worse in + // compression/quality and is also slower. + // sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; + sf->allow_partition_search_skip = 1; + sf->disable_wedge_search_var_thresh = 100; + sf->disable_wedge_search_edge_thresh = 0; + sf->fast_wedge_sign_estimate = 1; + sf->disable_dual_filter = 1; + sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED; + sf->prune_comp_type_by_comp_avg = 2; + sf->cb_pred_filter_search = 0; + sf->adaptive_interp_filter_search = 1; + } + + if (speed >= 3) { + sf->selective_ref_frame = 4; + sf->tx_size_search_method = boosted ? USE_FULL_RD : USE_LARGESTALL; + sf->less_rectangular_check_level = 2; + sf->adaptive_pred_interp_filter = 1; + // adaptive_motion_search breaks encoder multi-thread tests. + // The values in x->pred_mv[] differ for single and multi-thread cases. + // See aomedia:1778. + // sf->adaptive_motion_search = 1; + sf->recode_loop = ALLOW_RECODE_KFARFGF; + sf->use_transform_domain_distortion = 1; + sf->use_accurate_subpel_search = USE_2_TAPS; + sf->adaptive_rd_thresh = 2; + sf->tx_type_search.prune_mode = PRUNE_2D_FAST; + sf->gm_search_type = GM_DISABLE_SEARCH; + sf->prune_comp_search_by_single_result = 2; + sf->prune_motion_mode_level = boosted ? 2 : 3; + sf->prune_warp_using_wmtype = 1; + // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine + // it with cpi->sf.disable_wedge_search_var_thresh. + sf->disable_wedge_interintra_search = 1; + } + + if (speed >= 4) { + sf->use_intra_txb_hash = 0; + sf->use_mb_rd_hash = 0; + sf->tx_type_search.fast_intra_tx_type_search = 1; + sf->tx_type_search.fast_inter_tx_type_search = 1; + sf->use_square_partition_only_threshold = + boosted ? BLOCK_128X128 : BLOCK_4X4; + sf->tx_size_search_method = + frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; + sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED; + sf->adaptive_pred_interp_filter = 0; + sf->adaptive_mode_search = 1; + sf->cb_partition_search = !boosted; + sf->alt_ref_search_fp = 1; + sf->skip_sharp_interp_filter_search = 1; + } + + if (speed >= 5) { + sf->recode_loop = ALLOW_RECODE_KFMAXBW; + sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL; + sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL; + sf->use_square_partition_only_threshold = BLOCK_4X4; + sf->tx_size_search_method = USE_LARGESTALL; + sf->mv.search_method = BIGDIA; + sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; + sf->adaptive_rd_thresh = 4; + sf->mode_search_skip_flags = + (cm->current_frame.frame_type == KEY_FRAME) + ? 0 + : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR | + FLAG_EARLY_TERMINATE; + sf->disable_filter_search_var_thresh = 200; + sf->use_fast_coef_costing = 1; + sf->partition_search_breakout_rate_thr = 300; + sf->use_transform_domain_distortion = 2; + } + + if (speed >= 6) { + int i; + sf->optimize_coefficients = NO_TRELLIS_OPT; + sf->mv.search_method = HEX; + sf->disable_filter_search_var_thresh = 500; + for (i = 0; i < TX_SIZES; ++i) { + sf->intra_y_mode_mask[i] = INTRA_DC; + sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL; + } + sf->partition_search_breakout_rate_thr = 500; + sf->mv.reduce_first_step_size = 1; + sf->simple_model_rd_from_var = 1; + } + if (speed >= 7) { + sf->default_max_partition_size = BLOCK_32X32; + sf->default_min_partition_size = BLOCK_8X8; + sf->intra_y_mode_mask[TX_64X64] = INTRA_DC; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; + sf->frame_parameter_update = 0; + sf->mv.search_method = FAST_HEX; + sf->partition_search_type = REFERENCE_PARTITION; + sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH; + } + if (speed >= 8) { + sf->mv.search_method = FAST_DIAMOND; + sf->lpf_pick = LPF_PICK_MINIMAL_LPF; + sf->default_max_partition_size = BLOCK_128X128; + sf->default_min_partition_size = BLOCK_8X8; + sf->partition_search_type = VAR_BASED_PARTITION; + sf->use_nonrd_pick_mode = 1; + } +} + void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) { SPEED_FEATURES *const sf = &cpi->sf; const AV1EncoderConfig *const oxcf = &cpi->oxcf; @@ -570,12 +776,13 @@ sf->disable_interinter_wedge_newmv_search = 0; sf->prune_motion_mode_level = 0; sf->prune_warp_using_wmtype = 0; - sf->disable_wedge_interintra_search = 0; sf->perform_coeff_opt = 0; if (oxcf->mode == GOOD) set_good_speed_features_framesize_independent(cpi, sf, speed); + else if (oxcf->mode == REALTIME) + set_rt_speed_features_framesize_independent(cpi, sf, speed); if (!cpi->seq_params_locked) { cpi->common.seq_params.enable_dual_filter &= !sf->disable_dual_filter;

@@ -191,7 +191,9 @@ // Always use a fixed size partition FIXED_PARTITION, - REFERENCE_PARTITION + REFERENCE_PARTITION, + + VAR_BASED_PARTITION } UENUM1BYTE(PARTITION_SEARCH_TYPE); enum { @@ -652,6 +654,9 @@ // Flag used to control the extent of coeff R-D optimization int perform_coeff_opt; + + // This flag controls the use of non-RD mode decision. + int use_nonrd_pick_mode; } SPEED_FEATURES; struct AV1_COMP;

diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c new file mode 100644 index 0000000..7f19344 --- /dev/null +++ b/av1/encoder/var_based_part.c

@@ -0,0 +1,778 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <limits.h> +#include <math.h> +#include <stdbool.h> +#include <stdio.h> + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" + +#include "aom_dsp/aom_dsp_common.h" +#include "aom_dsp/binary_codes_writer.h" +#include "aom_ports/mem.h" +#include "aom_ports/aom_timer.h" +#include "aom_ports/system_state.h" + +#include "av1/common/reconinter.h" +#include "av1/common/blockd.h" + +#include "av1/encoder/encodeframe.h" +#include "av1/encoder/var_based_part.h" +#include "av1/encoder/reconinter_enc.h" + +extern const uint8_t AV1_VAR_OFFS[]; + +typedef struct { + // TODO(kyslov): consider changing to 64bit + + // This struct is used for computing variance in choose_partitioning(), where + // the max number of samples within a superblock is 32x32 (with 4x4 avg). + // With 8bit bitdepth, uint32_t is enough for sum_square_error (2^8 * 2^8 * 32 + // * 32 = 2^26). For high bitdepth we need to consider changing this to 64 bit + uint32_t sum_square_error; + int32_t sum_error; + int log2_count; + int variance; +} var; + +typedef struct { + var none; + var horz[2]; + var vert[2]; +} partition_variance; + +typedef struct { + partition_variance part_variances; + var split[4]; +} v4x4; + +typedef struct { + partition_variance part_variances; + v4x4 split[4]; +} v8x8; + +typedef struct { + partition_variance part_variances; + v8x8 split[4]; +} v16x16; + +typedef struct { + partition_variance part_variances; + v16x16 split[4]; +} v32x32; + +typedef struct { + partition_variance part_variances; + v32x32 split[4]; +} v64x64; + +typedef struct { + partition_variance part_variances; + v64x64 split[4]; +} v128x128; + +typedef struct { + partition_variance *part_variances; + var *split[4]; +} variance_node; + +static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { + int i; + node->part_variances = NULL; + switch (bsize) { + case BLOCK_128X128: { + v128x128 *vt = (v128x128 *)data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } + case BLOCK_64X64: { + v64x64 *vt = (v64x64 *)data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } + case BLOCK_32X32: { + v32x32 *vt = (v32x32 *)data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } + case BLOCK_16X16: { + v16x16 *vt = (v16x16 *)data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } + case BLOCK_8X8: { + v8x8 *vt = (v8x8 *)data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } + default: { + v4x4 *vt = (v4x4 *)data; + assert(bsize == BLOCK_4X4); + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) node->split[i] = &vt->split[i]; + break; + } + } +} + +// Set variance values given sum square error, sum error, count. +static void fill_variance(uint32_t s2, int32_t s, int c, var *v) { + v->sum_square_error = s2; + v->sum_error = s; + v->log2_count = c; +} + +static void get_variance(var *v) { + v->variance = + (int)(256 * (v->sum_square_error - + (uint32_t)(((int64_t)v->sum_error * v->sum_error) >> + v->log2_count)) >> + v->log2_count); +} + +static void sum_2_variances(const var *a, const var *b, var *r) { + assert(a->log2_count == b->log2_count); + fill_variance(a->sum_square_error + b->sum_square_error, + a->sum_error + b->sum_error, a->log2_count + 1, r); +} + +static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { + variance_node node; + memset(&node, 0, sizeof(node)); + tree_to_node(data, bsize, &node); + sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); + sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); + sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); + sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); + sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], + &node.part_variances->none); +} + +static void set_block_size(AV1_COMP *const cpi, MACROBLOCK *const x, + MACROBLOCKD *const xd, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { + set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); + xd->mi[0]->sb_type = bsize; + } +} + +static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x, + MACROBLOCKD *const xd, void *data, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int64_t threshold, BLOCK_SIZE bsize_min, + int force_split) { + AV1_COMMON *const cm = &cpi->common; + variance_node vt; + const int block_width = mi_size_wide[bsize]; + const int block_height = mi_size_high[bsize]; + + assert(block_height == block_width); + tree_to_node(data, bsize, &vt); + + if (force_split == 1) return 0; + + // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if + // variance is below threshold, otherwise split will be selected. + // No check for vert/horiz split as too few samples for variance. + if (bsize == bsize_min) { + // Variance already computed to set the force_split. + if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); + if (mi_col + block_width / 2 < cm->mi_cols && + mi_row + block_height / 2 < cm->mi_rows && + vt.part_variances->none.variance < threshold) { + set_block_size(cpi, x, xd, mi_row, mi_col, bsize); + return 1; + } + return 0; + } else if (bsize > bsize_min) { + // Variance already computed to set the force_split. + if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); + // For key frame: take split for bsize above 32X32 or very high variance. + if (frame_is_intra_only(cm) && + (bsize > BLOCK_32X32 || + vt.part_variances->none.variance > (threshold << 4))) { + return 0; + } + // If variance is low, take the bsize (no split). + if (mi_col + block_width / 2 < cm->mi_cols && + mi_row + block_height / 2 < cm->mi_rows && + vt.part_variances->none.variance < threshold) { + set_block_size(cpi, x, xd, mi_row, mi_col, bsize); + return 1; + } + + // Check vertical split. + if (mi_row + block_height / 2 < cm->mi_rows) { + BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT); + get_variance(&vt.part_variances->vert[0]); + get_variance(&vt.part_variances->vert[1]); + if (vt.part_variances->vert[0].variance < threshold && + vt.part_variances->vert[1].variance < threshold && + get_plane_block_size(subsize, xd->plane[1].subsampling_x, + xd->plane[1].subsampling_y) < BLOCK_INVALID) { + set_block_size(cpi, x, xd, mi_row, mi_col, subsize); + set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize); + return 1; + } + } + // Check horizontal split. + if (mi_col + block_width / 2 < cm->mi_cols) { + BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); + get_variance(&vt.part_variances->horz[0]); + get_variance(&vt.part_variances->horz[1]); + if (vt.part_variances->horz[0].variance < threshold && + vt.part_variances->horz[1].variance < threshold && + get_plane_block_size(subsize, xd->plane[1].subsampling_x, + xd->plane[1].subsampling_y) < BLOCK_INVALID) { + set_block_size(cpi, x, xd, mi_row, mi_col, subsize); + set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize); + return 1; + } + } + + return 0; + } + return 0; +} + +static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, + int dp, int x16_idx, int y16_idx, v16x16 *vst, + int pixels_wide, int pixels_high, + int is_key_frame) { + int k; + for (k = 0; k < 4; k++) { + int x8_idx = x16_idx + ((k & 1) << 3); + int y8_idx = y16_idx + ((k >> 1) << 3); + unsigned int sse = 0; + int sum = 0; + if (x8_idx < pixels_wide && y8_idx < pixels_high) { + int s_avg; + int d_avg = 128; + s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp); + if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp); + + sum = s_avg - d_avg; + sse = sum * sum; + } + fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); + } +} + +static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, + int dp, int x16_idx, int y16_idx, int pixels_wide, + int pixels_high) { + int k; + int minmax_max = 0; + int minmax_min = 255; + // Loop over the 4 8x8 subblocks. + for (k = 0; k < 4; k++) { + int x8_idx = x16_idx + ((k & 1) << 3); + int y8_idx = y16_idx + ((k >> 1) << 3); + int min = 0; + int max = 0; + if (x8_idx < pixels_wide && y8_idx < pixels_high) { + aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, + &min, &max); + if ((max - min) > minmax_max) minmax_max = (max - min); + if ((max - min) < minmax_min) minmax_min = (max - min); + } + } + return (minmax_max - minmax_min); +} + +static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, + int dp, int x8_idx, int y8_idx, v8x8 *vst, + int pixels_wide, int pixels_high, + int is_key_frame) { + int k; + for (k = 0; k < 4; k++) { + int x4_idx = x8_idx + ((k & 1) << 2); + int y4_idx = y8_idx + ((k >> 1) << 2); + unsigned int sse = 0; + int sum = 0; + if (x4_idx < pixels_wide && y4_idx < pixels_high) { + int s_avg; + int d_avg = 128; + s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp); + if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp); + sum = s_avg - d_avg; + sse = sum * sum; + } + fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); + } +} + +static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, + int width, int height, + int content_state) { + if (speed >= 8) { + if (width <= 640 && height <= 480) + return (5 * threshold_base) >> 2; + else if ((content_state == kLowSadLowSumdiff) || + (content_state == kHighSadLowSumdiff) || + (content_state == kLowVarHighSumdiff)) + return (5 * threshold_base) >> 2; + } else if (speed == 7) { + if ((content_state == kLowSadLowSumdiff) || + (content_state == kHighSadLowSumdiff) || + (content_state == kLowVarHighSumdiff)) { + return (5 * threshold_base) >> 2; + } + } + return threshold_base; +} + +// Set the variance split thresholds for following the block sizes: +// 0 - threshold_128x128, 1 - threshold_64x64, 2 - threshold_32x32, +// 3 - vbp_threshold_16x16. 4 - vbp_threshold_8x8 (to split to 4x4 partition) is +// currently only used on key frame. +static void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[], int q, + int content_state) { + AV1_COMMON *const cm = &cpi->common; + const int is_key_frame = frame_is_intra_only(cm); + const int threshold_multiplier = is_key_frame ? 40 : 1; + int64_t threshold_base = + (int64_t)(threshold_multiplier * cpi->dequants.y_dequant_QTX[q][1]); + + if (is_key_frame) { + thresholds[0] = threshold_base; + thresholds[1] = threshold_base; + thresholds[2] = threshold_base >> 2; + thresholds[3] = threshold_base >> 2; + thresholds[4] = threshold_base << 2; + } else { + // Increase base variance threshold based on content_state/sum_diff level. + threshold_base = scale_part_thresh_sumdiff( + threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state); + + thresholds[1] = threshold_base; + thresholds[3] = threshold_base << cpi->oxcf.speed; + if (cm->width >= 1280 && cm->height >= 720) + thresholds[3] = thresholds[3] << 1; + if (cm->width <= 352 && cm->height <= 288) { + thresholds[1] = threshold_base >> 3; + thresholds[2] = threshold_base >> 1; + thresholds[3] = threshold_base << 3; + } else if (cm->width < 1280 && cm->height < 720) { + thresholds[2] = (5 * threshold_base) >> 2; + } else if (cm->width < 1920 && cm->height < 1080) { + thresholds[2] = threshold_base << 1; + thresholds[3] <<= 2; + } else { + thresholds[2] = (5 * threshold_base) >> 1; + } + } +} + +void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q, + int content_state) { + AV1_COMMON *const cm = &cpi->common; + SPEED_FEATURES *const sf = &cpi->sf; + const int is_key_frame = frame_is_intra_only(cm); + if (sf->partition_search_type != VAR_BASED_PARTITION) { + return; + } else { + set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state); + // The thresholds below are not changed locally. + if (is_key_frame) { + cpi->vbp_threshold_sad = 0; + cpi->vbp_threshold_copy = 0; + cpi->vbp_bsize_min = BLOCK_8X8; + } else { + if (cm->width <= 352 && cm->height <= 288) + cpi->vbp_threshold_sad = 10; + else + cpi->vbp_threshold_sad = (cpi->dequants.y_dequant_QTX[q][1] << 1) > 1000 + ? (cpi->dequants.y_dequant_QTX[q][1] << 1) + : 1000; + cpi->vbp_bsize_min = BLOCK_16X16; + if (cm->width <= 352 && cm->height <= 288) + cpi->vbp_threshold_copy = 4000; + else if (cm->width <= 640 && cm->height <= 360) + cpi->vbp_threshold_copy = 8000; + else + cpi->vbp_threshold_copy = + (cpi->dequants.y_dequant_QTX[q][1] << 3) > 8000 + ? (cpi->dequants.y_dequant_QTX[q][1] << 3) + : 8000; + } + cpi->vbp_threshold_minmax = 15 + (q >> 3); + } +} + +// This function chooses partitioning based on the variance between source and +// reconstructed last, where variance is computed for down-sampled inputs. +// TODO(kyslov): lot of things. Bring back noise estimation, brush up partition +// selection +// and most of all - retune the thresholds +int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile, + MACROBLOCK *x, int mi_row, int mi_col) { + AV1_COMMON *const cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + + int i, j, k, m; + v128x128 *vt; + v16x16 *vt2 = NULL; + unsigned char force_split[85]; + int avg_32x32; + int max_var_32x32 = 0; + int min_var_32x32 = INT_MAX; + int var_32x32; + int var_64x64; + int min_var_64x64 = INT_MAX; + int max_var_64x64 = 0; + int avg_16x16[4]; + int maxvar_16x16[4]; + int minvar_16x16[4]; + int64_t threshold_4x4avg; + int content_state = 0; + uint8_t *s; + const uint8_t *d; + int sp; + int dp; + int compute_minmax_variance = 1; + int is_key_frame = frame_is_intra_only(cm); + int pixels_wide = 128, pixels_high = 128; + + CHECK_MEM_ERROR(cm, vt, aom_calloc(1, sizeof(*vt))); + + int64_t thresholds[5] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], + cpi->vbp_thresholds[2], cpi->vbp_thresholds[3], + cpi->vbp_thresholds[4] }; + + const int low_res = (cm->width <= 352 && cm->height <= 288); + int variance4x4downsample[64]; + int segment_id; + const int num_planes = av1_num_planes(cm); + + segment_id = xd->mi[0]->segment_id; + + set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state); + + // For non keyframes, disable 4x4 average for low resolution when speed = 8 + threshold_4x4avg = INT64_MAX; + + if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); + if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); + + s = x->plane[0].src.buf; + sp = x->plane[0].src.stride; + + // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, + // 5-20 for the 16x16 blocks. + force_split[0] = 0; + + if (!is_key_frame) { + // TODO(kyslov): we are assuming that the ref is LAST_FRAME! Check if it + // is!! + MB_MODE_INFO *mi = xd->mi[0]; + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME); + + assert(yv12 != NULL); + + av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, + get_ref_scale_factors(cm, LAST_FRAME), num_planes); + mi->ref_frame[0] = LAST_FRAME; + mi->ref_frame[1] = NONE_FRAME; + mi->sb_type = BLOCK_128X128; + mi->mv[0].as_int = 0; + mi->interp_filters = av1_make_interp_filters(BILINEAR, BILINEAR); + +// TODO(kyslov): bring the small SAD functionality back +#if 0 + y_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].pre[0].buf, + xd->plane[0].pre[0].stride); +#endif + x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; + + set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); + av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, BLOCK_128X128, + AOM_PLANE_Y, AOM_PLANE_Y); + + d = xd->plane[0].dst.buf; + dp = xd->plane[0].dst.stride; + + // If the y_sad is very small, take 64x64 as partition and exit. + // Don't check on boosted segment for now, as 64x64 is suppressed there. +#if 0 + if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) + { const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const + int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; if (mi_col + + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows) + { set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_128X128); + x->variance_low[0] = 1; + return 0; + } + } +#endif + } else { + d = AV1_VAR_OFFS; + dp = 0; + } + + if (low_res && threshold_4x4avg < INT64_MAX) + CHECK_MEM_ERROR(cm, vt2, aom_calloc(64, sizeof(*vt2))); + // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances + // for splits. + for (m = 0; m < 4; m++) { + const int x64_idx = ((m & 1) << 6); + const int y64_idx = ((m >> 1) << 6); + const int m2 = m << 2; + force_split[m + 1] = 0; + for (i = 0; i < 4; i++) { + const int x32_idx = x64_idx + ((i & 1) << 5); + const int y32_idx = y64_idx + ((i >> 1) << 5); + const int i2 = (m2 + i) << 2; + force_split[5 + m2 + i] = 0; + avg_16x16[i] = 0; + maxvar_16x16[i] = 0; + minvar_16x16[i] = INT_MAX; + for (j = 0; j < 4; j++) { + const int x16_idx = x32_idx + ((j & 1) << 4); + const int y16_idx = y32_idx + ((j >> 1) << 4); + const int split_index = 21 + i2 + j; + v16x16 *vst = &vt->split[m].split[i].split[j]; + force_split[split_index] = 0; + variance4x4downsample[i2 + j] = 0; + if (!is_key_frame) { + fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, pixels_wide, + pixels_high, is_key_frame); + fill_variance_tree(&vt->split[m].split[i].split[j], BLOCK_16X16); + get_variance(&vt->split[m].split[i].split[j].part_variances.none); + avg_16x16[i] += + vt->split[m].split[i].split[j].part_variances.none.variance; + if (vt->split[m].split[i].split[j].part_variances.none.variance < + minvar_16x16[i]) + minvar_16x16[i] = + vt->split[m].split[i].split[j].part_variances.none.variance; + if (vt->split[m].split[i].split[j].part_variances.none.variance > + maxvar_16x16[i]) + maxvar_16x16[i] = + vt->split[m].split[i].split[j].part_variances.none.variance; + if (vt->split[m].split[i].split[j].part_variances.none.variance > + thresholds[3]) { + // 16X16 variance is above threshold for split, so force split to + // 8x8 for this 16x16 block (this also forces splits for upper + // levels). + force_split[split_index] = 1; + force_split[5 + m2 + i] = 1; + force_split[m + 1] = 1; + force_split[0] = 1; + } else if (compute_minmax_variance && + vt->split[m] + .split[i] + .split[j] + .part_variances.none.variance > thresholds[2] && + !cyclic_refresh_segment_id_boosted(segment_id)) { + // We have some nominal amount of 16x16 variance (based on average), + // compute the minmax over the 8x8 sub-blocks, and if above + // threshold, force split to 8x8 block for this 16x16 block. + int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx, + pixels_wide, pixels_high); + int thresh_minmax = (int)cpi->vbp_threshold_minmax; + if (minmax > thresh_minmax) { + force_split[split_index] = 1; + force_split[5 + m2 + i] = 1; + force_split[m + 1] = 1; + force_split[0] = 1; + } + } + } + if (is_key_frame) { + force_split[split_index] = 0; + // Go down to 4x4 down-sampling for variance. + variance4x4downsample[i2 + j] = 1; + for (k = 0; k < 4; k++) { + int x8_idx = x16_idx + ((k & 1) << 3); + int y8_idx = y16_idx + ((k >> 1) << 3); + v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k]; + fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2, + pixels_wide, pixels_high, is_key_frame); + } + } + } + } + } + + // Fill the rest of the variance tree by summing split partition values. + for (m = 0; m < 4; ++m) { + avg_32x32 = 0; + const int m2 = m << 2; + for (i = 0; i < 4; i++) { + const int i2 = (m2 + i) << 2; + for (j = 0; j < 4; j++) { + const int split_index = 21 + i2 + j; + if (variance4x4downsample[i2 + j] == 1) { + v16x16 *vtemp = + (!is_key_frame) ? &vt2[i2 + j] : &vt->split[m].split[i].split[j]; + for (k = 0; k < 4; k++) + fill_variance_tree(&vtemp->split[k], BLOCK_8X8); + fill_variance_tree(vtemp, BLOCK_16X16); + // If variance of this 16x16 block is above the threshold, force block + // to split. This also forces a split on the upper levels. + get_variance(&vtemp->part_variances.none); + if (vtemp->part_variances.none.variance > thresholds[3]) { + force_split[split_index] = 1; + force_split[5 + m2 + i] = 1; + force_split[m + 1] = 1; + force_split[0] = 1; + } + } + } + fill_variance_tree(&vt->split[m].split[i], BLOCK_32X32); + // If variance of this 32x32 block is above the threshold, or if its above + // (some threshold of) the average variance over the sub-16x16 blocks, + // then force this block to split. This also forces a split on the upper + // (64x64) level. + if (!force_split[5 + m2 + i]) { + get_variance(&vt->split[m].split[i].part_variances.none); + var_32x32 = vt->split[m].split[i].part_variances.none.variance; + max_var_32x32 = AOMMAX(var_32x32, max_var_32x32); + min_var_32x32 = AOMMIN(var_32x32, min_var_32x32); + if (vt->split[m].split[i].part_variances.none.variance > + thresholds[2] || + (!is_key_frame && + vt->split[m].split[i].part_variances.none.variance > + (thresholds[2] >> 1) && + vt->split[m].split[i].part_variances.none.variance > + (avg_16x16[i] >> 1))) { + force_split[5 + m2 + i] = 1; + force_split[m + 1] = 1; + force_split[0] = 1; + } else if (!is_key_frame && cm->height <= 360 && + (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[2] >> 1) && + maxvar_16x16[i] > thresholds[2]) { + force_split[5 + m2 + i] = 1; + force_split[m + 1] = 1; + force_split[0] = 1; + } + avg_32x32 += var_32x32; + } + } + if (!force_split[1 + m]) { + fill_variance_tree(&vt->split[m], BLOCK_64X64); + get_variance(&vt->split[m].part_variances.none); + var_64x64 = vt->split[m].part_variances.none.variance; + max_var_64x64 = AOMMAX(var_64x64, max_var_64x64); + min_var_64x64 = AOMMIN(var_64x64, min_var_64x64); + // If variance of this 64x64 block is above (some threshold of) the + // average variance over the sub-32x32 blocks, then force this block to + // split. Only checking this for noise level >= medium for now. + + if (!is_key_frame && + (max_var_32x32 - min_var_32x32) > 3 * (thresholds[1] >> 3) && + max_var_32x32 > thresholds[1] >> 1) + force_split[1 + m] = 1; + } + } + + if (!force_split[0]) { + fill_variance_tree(vt, BLOCK_128X128); + get_variance(&vt->part_variances.none); + if (!is_key_frame && + (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) && + max_var_64x64 > thresholds[0] >> 1) + force_split[0] = 1; + } + + const int mi_rows_remaining = tile->mi_row_end - mi_row; + const int mi_cols_remaining = tile->mi_col_end - mi_col; + if (!((mi_cols_remaining >= cm->seq_params.mib_size) && + (mi_rows_remaining >= cm->seq_params.mib_size))) { + int bh = mi_size_high[BLOCK_128X128]; + int bw = mi_size_wide[BLOCK_128X128]; + + int r, c; + for (r = 0; r < cm->seq_params.mib_size; r += bh) { + bw = mi_size_wide[BLOCK_128X128]; + for (c = 0; c < cm->seq_params.mib_size; c += bw) { + BLOCK_SIZE sb_type = + find_partition_size(BLOCK_128X128, mi_rows_remaining - r, + mi_cols_remaining - c, &bh, &bw); + set_block_size(cpi, x, xd, mi_row + r, mi_col + c, sb_type); + } + } + + } else { + if (!set_vt_partitioning(cpi, x, xd, vt, BLOCK_128X128, mi_row, mi_col, + thresholds[0], BLOCK_16X16, force_split[0])) { + for (m = 0; m < 4; ++m) { + const int x64_idx = ((m & 1) << 4); + const int y64_idx = ((m >> 1) << 4); + const int m2 = m << 2; + + // Now go through the entire structure, splitting every block size until + // we get to one that's got a variance lower than our threshold. + if (!set_vt_partitioning(cpi, x, xd, &vt->split[m], BLOCK_64X64, + mi_row + y64_idx, mi_col + x64_idx, + thresholds[1], BLOCK_16X16, + force_split[1 + m])) { + for (i = 0; i < 4; ++i) { + const int x32_idx = ((i & 1) << 3); + const int y32_idx = ((i >> 1) << 3); + const int i2 = (m2 + i) << 2; + if (!set_vt_partitioning( + cpi, x, xd, &vt->split[m].split[i], BLOCK_32X32, + (mi_row + y64_idx + y32_idx), (mi_col + x64_idx + x32_idx), + thresholds[2], BLOCK_16X16, force_split[5 + m2 + i])) { + for (j = 0; j < 4; ++j) { + const int x16_idx = ((j & 1) << 2); + const int y16_idx = ((j >> 1) << 2); + const int split_index = 21 + i2 + j; + // For inter frames: if variance4x4downsample[] == 1 for this + // 16x16 block, then the variance is based on 4x4 down-sampling, + // so use vt2 in set_vt_partioning(), otherwise use vt. + v16x16 *vtemp = + (!is_key_frame && variance4x4downsample[i2 + j] == 1) + ? &vt2[i2 + j] + : &vt->split[m].split[i].split[j]; + if (!set_vt_partitioning(cpi, x, xd, vtemp, BLOCK_16X16, + mi_row + y64_idx + y32_idx + y16_idx, + mi_col + x64_idx + x32_idx + x16_idx, + thresholds[3], BLOCK_8X8, + force_split[split_index])) { + for (k = 0; k < 4; ++k) { + const int x8_idx = (k & 1) << 1; + const int y8_idx = (k >> 1) << 1; + set_block_size( + cpi, x, xd, + (mi_row + y64_idx + y32_idx + y16_idx + y8_idx), + (mi_col + x64_idx + x32_idx + x16_idx + x8_idx), + BLOCK_8X8); + } + } + } + } + } + } + } + } + } + + if (vt2) aom_free(vt2); + if (vt) aom_free(vt); + return 0; +}

diff --git a/av1/encoder/var_based_part.h b/av1/encoder/var_based_part.h new file mode 100644 index 0000000..c355224 --- /dev/null +++ b/av1/encoder/var_based_part.h

@@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_AV1_ENCODER_VAR_BASED_PART_H_ +#define AOM_AV1_ENCODER_VAR_BASED_PART_H_ + +#include <stdio.h> + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" + +#include "av1/encoder/encoder.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q, + int content_state); + +int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile, + MACROBLOCK *x, int mi_row, int mi_col); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // AOM_AV1_ENCODER_VAR_BASED_PART_H_

@@ -50,7 +50,7 @@ EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, kCodecs[i], NULL, 0)); EXPECT_EQ(AOM_CODEC_INVALID_PARAM, - aom_codec_enc_config_default(kCodecs[i], &cfg, 1)); + aom_codec_enc_config_default(kCodecs[i], &cfg, 2)); EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(kCodecs[i], &cfg, 0)); EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, kCodecs[i], &cfg, 0));

diff --git a/test/rt_end_to_end_test.cc b/test/rt_end_to_end_test.cc new file mode 100644 index 0000000..63b602b --- /dev/null +++ b/test/rt_end_to_end_test.cc

@@ -0,0 +1,141 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" + +namespace { + +const unsigned int kFrames = 10; +const int kBitrate = 500; + +// List of psnr thresholds for speed settings 0-8 +const double kPsnrThreshold[9] = { 36.9, 36.9, 36.85, 36.8, 36.6, + 36.4, 36.0, 35.5, 34.5 }; + +typedef struct { + const char *filename; + unsigned int input_bit_depth; + aom_img_fmt fmt; + aom_bit_depth_t bit_depth; + unsigned int profile; +} TestVideoParam; + +std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) { + return os << "TestVideoParam { filename:" << test_arg.filename + << " input_bit_depth:" << test_arg.input_bit_depth + << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth + << " profile:" << test_arg.profile << "}"; +} + +// TODO(kyslov): Add more test vectors +const TestVideoParam kTestVectors[] = { + { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, +}; + +// Speed settings tested +const int kCpuUsedVectors[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 }; + +class RTEndToEndTest + : public ::libaom_test::CodecTestWith2Params<TestVideoParam, int>, + public ::libaom_test::EncoderTest { + protected: + RTEndToEndTest() + : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), psnr_(0.0), nframes_(0) {} + + virtual ~RTEndToEndTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(::libaom_test::kRealTime); + + cfg_.g_usage = 1; // TODO(kyslov): Move it to encode_test_driver.cc + cfg_.rc_end_usage = AOM_CBR; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + } + + virtual void BeginPassHook(unsigned int) { + psnr_ = 0.0; + nframes_ = 0; + } + + virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, 1); + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT); + } + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + double GetPsnrThreshold() { return kPsnrThreshold[cpu_used_]; } + + void DoTest() { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = AOM_CODEC_USE_PSNR; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + std::unique_ptr<libaom_test::VideoSource> video; + video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + kFrames)); + ASSERT_TRUE(video.get() != NULL); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double psnr = GetAveragePsnr(); + EXPECT_GT(psnr, GetPsnrThreshold()) << "cpu used = " << cpu_used_; + } + + TestVideoParam test_video_param_; + int cpu_used_; + + private: + double psnr_; + unsigned int nframes_; +}; + +class RTEndToEndTestLarge : public RTEndToEndTest {}; + +TEST_P(RTEndToEndTestLarge, EndtoEndPSNRTest) { DoTest(); } + +TEST_P(RTEndToEndTest, EndtoEndPSNRTest) { DoTest(); } + +AV1_INSTANTIATE_TEST_CASE(RTEndToEndTestLarge, + ::testing::ValuesIn(kTestVectors), + ::testing::ValuesIn(kCpuUsedVectors)); + +AV1_INSTANTIATE_TEST_CASE(RTEndToEndTest, ::testing::V