ffmpeg / libavcodec / vp8.c @ 476be414
History  View  Annotate  Download (59 KB)
1 
/**


2 
* VP8 compatible video decoder

3 
*

4 
* Copyright (C) 2010 David Conrad

5 
* Copyright (C) 2010 Ronald S. Bultje

6 
* Copyright (C) 2010 Jason GarrettGlaser

7 
*

8 
* This file is part of FFmpeg.

9 
*

10 
* FFmpeg is free software; you can redistribute it and/or

11 
* modify it under the terms of the GNU Lesser General Public

12 
* License as published by the Free Software Foundation; either

13 
* version 2.1 of the License, or (at your option) any later version.

14 
*

15 
* FFmpeg is distributed in the hope that it will be useful,

16 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

17 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

18 
* Lesser General Public License for more details.

19 
*

20 
* You should have received a copy of the GNU Lesser General Public

21 
* License along with FFmpeg; if not, write to the Free Software

22 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

23 
*/

24  
25 
#include "avcodec.h" 
26 
#include "vp56.h" 
27 
#include "vp8data.h" 
28 
#include "vp8dsp.h" 
29 
#include "h264pred.h" 
30 
#include "rectangle.h" 
31  
32 
typedef struct { 
33 
uint8_t filter_level; 
34 
uint8_t inner_limit; 
35 
uint8_t inner_filter; 
36 
} VP8FilterStrength; 
37  
38 
typedef struct { 
39 
uint8_t skip; 
40 
// todo: make it possible to check for at least (i4x4 or split_mv)

41 
// in one op. are others needed?

42 
uint8_t mode; 
43 
uint8_t ref_frame; 
44 
uint8_t partitioning; 
45 
VP56mv mv; 
46 
VP56mv bmv[16];

47 
} VP8Macroblock; 
48  
49 
typedef struct { 
50 
AVCodecContext *avctx; 
51 
DSPContext dsp; 
52 
VP8DSPContext vp8dsp; 
53 
H264PredContext hpc; 
54 
vp8_mc_func put_pixels_tab[3][3][3]; 
55 
AVFrame frames[4];

56 
AVFrame *framep[4];

57 
uint8_t *edge_emu_buffer; 
58 
VP56RangeCoder c; ///< header context, includes mb modes and motion vectors

59 
int profile;

60  
61 
int mb_width; /* number of horizontal MB */ 
62 
int mb_height; /* number of vertical MB */ 
63 
int linesize;

64 
int uvlinesize;

65  
66 
int keyframe;

67 
int invisible;

68 
int update_last; ///< update VP56_FRAME_PREVIOUS with the current one 
69 
int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so 
70 
int update_altref;

71 
int deblock_filter;

72  
73 
/**

74 
* If this flag is not set, all the probability updates

75 
* are discarded after this frame is decoded.

76 
*/

77 
int update_probabilities;

78  
79 
/**

80 
* All coefficients are contained in separate arith coding contexts.

81 
* There can be 1, 2, 4, or 8 of these after the header context.

82 
*/

83 
int num_coeff_partitions;

84 
VP56RangeCoder coeff_partition[8];

85  
86 
VP8Macroblock *macroblocks; 
87 
VP8Macroblock *macroblocks_base; 
88 
VP8FilterStrength *filter_strength; 
89 
int mb_stride;

90  
91 
uint8_t *intra4x4_pred_mode_top; 
92 
uint8_t intra4x4_pred_mode_left[4];

93 
uint8_t *segmentation_map; 
94 
int b4_stride;

95  
96 
/**

97 
* Cache of the top row needed for intra prediction

98 
* 16 for luma, 8 for each chroma plane

99 
*/

100 
uint8_t (*top_border)[16+8+8]; 
101  
102 
/**

103 
* For coeff decode, we need to know whether the above block had nonzero

104 
* coefficients. This means for each macroblock, we need data for 4 luma

105 
* blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9

106 
* per macroblock. We keep the last row in top_nnz.

107 
*/

108 
uint8_t (*top_nnz)[9];

109 
DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; 
110  
111 
/**

112 
* This is the index plus one of the last nonzero coeff

113 
* for each of the blocks in the current macroblock.

114 
* So, 0 > no coeffs

115 
* 1 > dconly (special transform)

116 
* 2+> full transform

117 
*/

118 
DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; 
119 
DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; 
120 
DECLARE_ALIGNED(16, DCTELEM, block_dc)[16]; 
121 
uint8_t intra4x4_pred_mode_mb[16];

122  
123 
int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock 
124 
int segment; ///< segment of the current macroblock 
125  
126 
int mbskip_enabled;

127 
int sign_bias[4]; ///< one state [0, 1] per ref frame type 
128 
int ref_count[3]; 
129  
130 
/**

131 
* Base parameters for segmentation, i.e. permacroblock parameters.

132 
* These must be kept unchanged even if segmentation is not used for

133 
* a frame, since the values persist between interframes.

134 
*/

135 
struct {

136 
int enabled;

137 
int absolute_vals;

138 
int update_map;

139 
int8_t base_quant[4];

140 
int8_t filter_level[4]; ///< base loop filter level 
141 
} segmentation; 
142  
143 
/**

144 
* Macroblocks can have one of 4 different quants in a frame when

145 
* segmentation is enabled.

146 
* If segmentation is disabled, only the first segment's values are used.

147 
*/

148 
struct {

149 
// [0]  DC qmul [1]  AC qmul

150 
int16_t luma_qmul[2];

151 
int16_t luma_dc_qmul[2]; ///< luma dconly block quant 
152 
int16_t chroma_qmul[2];

153 
} qmat[4];

154  
155 
struct {

156 
int simple;

157 
int level;

158 
int sharpness;

159 
} filter; 
160  
161 
struct {

162 
int enabled; ///< whether each mb can have a different strength based on mode/ref 
163  
164 
/**

165 
* filter strength adjustment for the following macroblock modes:

166 
* [0]  i4x4

167 
* [1]  zero mv

168 
* [2]  inter modes except for zero or split mv

169 
* [3]  split mv

170 
* i16x16 modes never have any adjustment

171 
*/

172 
int8_t mode[4];

173  
174 
/**

175 
* filter strength adjustment for macroblocks that reference:

176 
* [0]  intra / VP56_FRAME_CURRENT

177 
* [1]  VP56_FRAME_PREVIOUS

178 
* [2]  VP56_FRAME_GOLDEN

179 
* [3]  altref / VP56_FRAME_GOLDEN2

180 
*/

181 
int8_t ref[4];

182 
} lf_delta; 
183  
184 
/**

185 
* These are all of the updatable probabilities for binary decisions.

186 
* They are only implictly reset on keyframes, making it quite likely

187 
* for an interframe to desync if a prior frame's header was corrupt

188 
* or missing outright!

189 
*/

190 
struct {

191 
uint8_t segmentid[3];

192 
uint8_t mbskip; 
193 
uint8_t intra; 
194 
uint8_t last; 
195 
uint8_t golden; 
196 
uint8_t pred16x16[4];

197 
uint8_t pred8x8c[3];

198 
/* Padded to allow overreads */

199 
uint8_t token[4][17][3][NUM_DCT_TOKENS1]; 
200 
uint8_t mvc[2][19]; 
201 
} prob[2];

202 
} VP8Context; 
203  
204 
static void vp8_decode_flush(AVCodecContext *avctx) 
205 
{ 
206 
VP8Context *s = avctx>priv_data; 
207 
int i;

208  
209 
for (i = 0; i < 4; i++) 
210 
if (s>frames[i].data[0]) 
211 
avctx>release_buffer(avctx, &s>frames[i]); 
212 
memset(s>framep, 0, sizeof(s>framep)); 
213  
214 
av_freep(&s>macroblocks_base); 
215 
av_freep(&s>filter_strength); 
216 
av_freep(&s>intra4x4_pred_mode_top); 
217 
av_freep(&s>top_nnz); 
218 
av_freep(&s>edge_emu_buffer); 
219 
av_freep(&s>top_border); 
220 
av_freep(&s>segmentation_map); 
221  
222 
s>macroblocks = NULL;

223 
} 
224  
225 
static int update_dimensions(VP8Context *s, int width, int height) 
226 
{ 
227 
if (avcodec_check_dimensions(s>avctx, width, height))

228 
return AVERROR_INVALIDDATA;

229  
230 
vp8_decode_flush(s>avctx); 
231  
232 
avcodec_set_dimensions(s>avctx, width, height); 
233  
234 
s>mb_width = (s>avctx>coded_width +15) / 16; 
235 
s>mb_height = (s>avctx>coded_height+15) / 16; 
236  
237 
// we allocate a border around the top/left of intra4x4 modes

238 
// this is 4 blocks for intra4x4 to keep 4byte alignment for fill_rectangle

239 
s>mb_stride = s>mb_width+1;

240 
s>b4_stride = 4*s>mb_stride;

241  
242 
s>macroblocks_base = av_mallocz((s>mb_stride+s>mb_height*2+2)*sizeof(*s>macroblocks)); 
243 
s>filter_strength = av_mallocz(s>mb_stride*sizeof(*s>filter_strength));

244 
s>intra4x4_pred_mode_top = av_mallocz(s>b4_stride*4);

245 
s>top_nnz = av_mallocz(s>mb_width*sizeof(*s>top_nnz));

246 
s>top_border = av_mallocz((s>mb_width+1)*sizeof(*s>top_border)); 
247 
s>segmentation_map = av_mallocz(s>mb_stride*s>mb_height); 
248  
249 
if (!s>macroblocks_base  !s>filter_strength  !s>intra4x4_pred_mode_top 

250 
!s>top_nnz  !s>top_border  !s>segmentation_map) 
251 
return AVERROR(ENOMEM);

252  
253 
s>macroblocks = s>macroblocks_base + 1;

254  
255 
return 0; 
256 
} 
257  
258 
static void parse_segment_info(VP8Context *s) 
259 
{ 
260 
VP56RangeCoder *c = &s>c; 
261 
int i;

262  
263 
s>segmentation.update_map = vp8_rac_get(c); 
264  
265 
if (vp8_rac_get(c)) { // update segment feature data 
266 
s>segmentation.absolute_vals = vp8_rac_get(c); 
267  
268 
for (i = 0; i < 4; i++) 
269 
s>segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);

270  
271 
for (i = 0; i < 4; i++) 
272 
s>segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);

273 
} 
274 
if (s>segmentation.update_map)

275 
for (i = 0; i < 3; i++) 
276 
s>prob>segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; 
277 
} 
278  
279 
static void update_lf_deltas(VP8Context *s) 
280 
{ 
281 
VP56RangeCoder *c = &s>c; 
282 
int i;

283  
284 
for (i = 0; i < 4; i++) 
285 
s>lf_delta.ref[i] = vp8_rac_get_sint(c, 6);

286  
287 
for (i = 0; i < 4; i++) 
288 
s>lf_delta.mode[i] = vp8_rac_get_sint(c, 6);

289 
} 
290  
291 
static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) 
292 
{ 
293 
const uint8_t *sizes = buf;

294 
int i;

295  
296 
s>num_coeff_partitions = 1 << vp8_rac_get_uint(&s>c, 2); 
297  
298 
buf += 3*(s>num_coeff_partitions1); 
299 
buf_size = 3*(s>num_coeff_partitions1); 
300 
if (buf_size < 0) 
301 
return 1; 
302  
303 
for (i = 0; i < s>num_coeff_partitions1; i++) { 
304 
int size = AV_RL24(sizes + 3*i); 
305 
if (buf_size  size < 0) 
306 
return 1; 
307  
308 
vp56_init_range_decoder(&s>coeff_partition[i], buf, size); 
309 
buf += size; 
310 
buf_size = size; 
311 
} 
312 
vp56_init_range_decoder(&s>coeff_partition[i], buf, buf_size); 
313  
314 
return 0; 
315 
} 
316  
317 
static void get_quants(VP8Context *s) 
318 
{ 
319 
VP56RangeCoder *c = &s>c; 
320 
int i, base_qi;

321  
322 
int yac_qi = vp8_rac_get_uint(c, 7); 
323 
int ydc_delta = vp8_rac_get_sint(c, 4); 
324 
int y2dc_delta = vp8_rac_get_sint(c, 4); 
325 
int y2ac_delta = vp8_rac_get_sint(c, 4); 
326 
int uvdc_delta = vp8_rac_get_sint(c, 4); 
327 
int uvac_delta = vp8_rac_get_sint(c, 4); 
328  
329 
for (i = 0; i < 4; i++) { 
330 
if (s>segmentation.enabled) {

331 
base_qi = s>segmentation.base_quant[i]; 
332 
if (!s>segmentation.absolute_vals)

333 
base_qi += yac_qi; 
334 
} else

335 
base_qi = yac_qi; 
336  
337 
s>qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)]; 
338 
s>qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)]; 
339 
s>qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)]; 
340 
s>qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100; 
341 
s>qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)]; 
342 
s>qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)]; 
343  
344 
s>qmat[i].luma_dc_qmul[1] = FFMAX(s>qmat[i].luma_dc_qmul[1], 8); 
345 
s>qmat[i].chroma_qmul[0] = FFMIN(s>qmat[i].chroma_qmul[0], 132); 
346 
} 
347 
} 
348  
349 
/**

350 
* Determine which buffers golden and altref should be updated with after this frame.

351 
* The spec isn't clear here, so I'm going by my understanding of what libvpx does

352 
*

353 
* Intra frames update all 3 references

354 
* Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set

355 
* If the update (goldenaltref) flag is set, it's updated with the current frame

356 
* if update_last is set, and VP56_FRAME_PREVIOUS otherwise.

357 
* If the flag is not set, the number read means:

358 
* 0: no update

359 
* 1: VP56_FRAME_PREVIOUS

360 
* 2: update golden with altref, or update altref with golden

361 
*/

362 
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) 
363 
{ 
364 
VP56RangeCoder *c = &s>c; 
365  
366 
if (update)

367 
return VP56_FRAME_CURRENT;

368  
369 
switch (vp8_rac_get_uint(c, 2)) { 
370 
case 1: 
371 
return VP56_FRAME_PREVIOUS;

372 
case 2: 
373 
return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;

374 
} 
375 
return VP56_FRAME_NONE;

376 
} 
377  
378 
static void update_refs(VP8Context *s) 
379 
{ 
380 
VP56RangeCoder *c = &s>c; 
381  
382 
int update_golden = vp8_rac_get(c);

383 
int update_altref = vp8_rac_get(c);

384  
385 
s>update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); 
386 
s>update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); 
387 
} 
388  
389 
static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) 
390 
{ 
391 
VP56RangeCoder *c = &s>c; 
392 
int header_size, hscale, vscale, i, j, k, l, m, ret;

393 
int width = s>avctx>width;

394 
int height = s>avctx>height;

395  
396 
s>keyframe = !(buf[0] & 1); 
397 
s>profile = (buf[0]>>1) & 7; 
398 
s>invisible = !(buf[0] & 0x10); 
399 
header_size = AV_RL24(buf) >> 5;

400 
buf += 3;

401 
buf_size = 3;

402  
403 
if (s>profile > 3) 
404 
av_log(s>avctx, AV_LOG_WARNING, "Unknown profile %d\n", s>profile);

405  
406 
if (!s>profile)

407 
memcpy(s>put_pixels_tab, s>vp8dsp.put_vp8_epel_pixels_tab, sizeof(s>put_pixels_tab));

408 
else // profile 13 use bilinear, 4+ aren't defined so whatever 
409 
memcpy(s>put_pixels_tab, s>vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s>put_pixels_tab));

410  
411 
if (header_size > buf_size  7*s>keyframe) { 
412 
av_log(s>avctx, AV_LOG_ERROR, "Header size larger than data provided\n");

413 
return AVERROR_INVALIDDATA;

414 
} 
415  
416 
if (s>keyframe) {

417 
if (AV_RL24(buf) != 0x2a019d) { 
418 
av_log(s>avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));

419 
return AVERROR_INVALIDDATA;

420 
} 
421 
width = AV_RL16(buf+3) & 0x3fff; 
422 
height = AV_RL16(buf+5) & 0x3fff; 
423 
hscale = buf[4] >> 6; 
424 
vscale = buf[6] >> 6; 
425 
buf += 7;

426 
buf_size = 7;

427  
428 
if (hscale  vscale)

429 
av_log_missing_feature(s>avctx, "Upscaling", 1); 
430  
431 
s>update_golden = s>update_altref = VP56_FRAME_CURRENT; 
432 
for (i = 0; i < 4; i++) 
433 
for (j = 0; j < 16; j++) 
434 
memcpy(s>prob>token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]], 
435 
sizeof(s>prob>token[i][j]));

436 
memcpy(s>prob>pred16x16, vp8_pred16x16_prob_inter, sizeof(s>prob>pred16x16));

437 
memcpy(s>prob>pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s>prob>pred8x8c));

438 
memcpy(s>prob>mvc , vp8_mv_default_prob , sizeof(s>prob>mvc));

439 
memset(&s>segmentation, 0, sizeof(s>segmentation)); 
440 
} 
441  
442 
if (!s>macroblocks_base  /* first frame */ 
443 
width != s>avctx>width  height != s>avctx>height) { 
444 
if ((ret = update_dimensions(s, width, height) < 0)) 
445 
return ret;

446 
} 
447  
448 
vp56_init_range_decoder(c, buf, header_size); 
449 
buf += header_size; 
450 
buf_size = header_size; 
451  
452 
if (s>keyframe) {

453 
if (vp8_rac_get(c))

454 
av_log(s>avctx, AV_LOG_WARNING, "Unspecified colorspace\n");

455 
vp8_rac_get(c); // whether we can skip clamping in dsp functions

456 
} 
457  
458 
if ((s>segmentation.enabled = vp8_rac_get(c)))

459 
parse_segment_info(s); 
460 
else

461 
s>segmentation.update_map = 0; // FIXME: move this to some init function? 
462  
463 
s>filter.simple = vp8_rac_get(c); 
464 
s>filter.level = vp8_rac_get_uint(c, 6);

465 
s>filter.sharpness = vp8_rac_get_uint(c, 3);

466  
467 
if ((s>lf_delta.enabled = vp8_rac_get(c)))

468 
if (vp8_rac_get(c))

469 
update_lf_deltas(s); 
470  
471 
if (setup_partitions(s, buf, buf_size)) {

472 
av_log(s>avctx, AV_LOG_ERROR, "Invalid partitions\n");

473 
return AVERROR_INVALIDDATA;

474 
} 
475  
476 
get_quants(s); 
477  
478 
if (!s>keyframe) {

479 
update_refs(s); 
480 
s>sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); 
481 
s>sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);

482 
} 
483  
484 
// if we aren't saving this frame's probabilities for future frames,

485 
// make a copy of the current probabilities

486 
if (!(s>update_probabilities = vp8_rac_get(c)))

487 
s>prob[1] = s>prob[0]; 
488  
489 
s>update_last = s>keyframe  vp8_rac_get(c); 
490  
491 
for (i = 0; i < 4; i++) 
492 
for (j = 0; j < 8; j++) 
493 
for (k = 0; k < 3; k++) 
494 
for (l = 0; l < NUM_DCT_TOKENS1; l++) 
495 
if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {

496 
int prob = vp8_rac_get_uint(c, 8); 
497 
for (m = 0; m < 16; m++) 
498 
if (vp8_coeff_band[m] == j)

499 
s>prob>token[i][m][k][l] = prob; 
500 
} 
501  
502 
if ((s>mbskip_enabled = vp8_rac_get(c)))

503 
s>prob>mbskip = vp8_rac_get_uint(c, 8);

504  
505 
if (!s>keyframe) {

506 
s>prob>intra = vp8_rac_get_uint(c, 8);

507 
s>prob>last = vp8_rac_get_uint(c, 8);

508 
s>prob>golden = vp8_rac_get_uint(c, 8);

509  
510 
if (vp8_rac_get(c))

511 
for (i = 0; i < 4; i++) 
512 
s>prob>pred16x16[i] = vp8_rac_get_uint(c, 8);

513 
if (vp8_rac_get(c))

514 
for (i = 0; i < 3; i++) 
515 
s>prob>pred8x8c[i] = vp8_rac_get_uint(c, 8);

516  
517 
// 17.2 MV probability update

518 
for (i = 0; i < 2; i++) 
519 
for (j = 0; j < 19; j++) 
520 
if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))

521 
s>prob>mvc[i][j] = vp8_rac_get_nn(c); 
522 
} 
523  
524 
return 0; 
525 
} 
526  
527 
static av_always_inline

528 
void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y) 
529 
{ 
530 
#define MARGIN (16 << 2) 
531 
dst>x = av_clip(src>x, ((mb_x << 6) + MARGIN),

532 
((s>mb_width  1  mb_x) << 6) + MARGIN); 
533 
dst>y = av_clip(src>y, ((mb_y << 6) + MARGIN),

534 
((s>mb_height  1  mb_y) << 6) + MARGIN); 
535 
} 
536  
537 
static av_always_inline

538 
void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, 
539 
VP56mv near[2], VP56mv *best, uint8_t cnt[4]) 
540 
{ 
541 
VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, 
542 
mb  1 /* left */, 
543 
mb + 1 /* topleft */ }; 
544 
enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };

545 
VP56mv near_mv[4] = {{ 0 }}; 
546 
enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };

547 
int idx = CNT_ZERO;

548 
int best_idx = CNT_ZERO;

549 
int cur_sign_bias = s>sign_bias[mb>ref_frame];

550 
int *sign_bias = s>sign_bias;

551  
552 
/* Process MB on top, left and topleft */

553 
#define MV_EDGE_CHECK(n)\

554 
{\ 
555 
VP8Macroblock *edge = mb_edge[n];\ 
556 
int edge_ref = edge>ref_frame;\

557 
if (edge_ref != VP56_FRAME_CURRENT) {\

558 
uint32_t mv = AV_RN32A(&edge>mv);\ 
559 
if (mv) {\

560 
if (cur_sign_bias != sign_bias[edge_ref]) {\

561 
/* SWAR negate of the values in mv. */\

562 
mv = ~mv;\ 
563 
mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\ 
564 
}\ 
565 
if (!n  mv != AV_RN32A(&near_mv[idx]))\

566 
AV_WN32A(&near_mv[++idx], mv);\ 
567 
cnt[idx] += 1 + (n != 2);\ 
568 
} else\

569 
cnt[CNT_ZERO] += 1 + (n != 2);\ 
570 
}\ 
571 
} 
572 
MV_EDGE_CHECK(0)

573 
MV_EDGE_CHECK(1)

574 
MV_EDGE_CHECK(2)

575  
576 
/* If we have three distinct MVs, merge first and last if they're the same */

577 
if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1+EDGE_TOP]) == AV_RN32A(&near_mv[1+EDGE_TOPLEFT])) 
578 
cnt[CNT_NEAREST] += 1;

579  
580 
cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]>mode == VP8_MVMODE_SPLIT) + 
581 
(mb_edge[EDGE_TOP]>mode == VP8_MVMODE_SPLIT)) * 2 +

582 
(mb_edge[EDGE_TOPLEFT]>mode == VP8_MVMODE_SPLIT); 
583  
584 
/* Swap near and nearest if necessary */

585 
if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {

586 
FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); 
587 
FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); 
588 
} 
589  
590 
/* Choose the best mv out of 0,0 and the nearest mv */

591 
if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])

592 
best_idx = CNT_NEAREST; 
593  
594 
mb>mv = near_mv[best_idx]; 
595 
near[0] = near_mv[CNT_NEAREST];

596 
near[1] = near_mv[CNT_NEAR];

597 
} 
598  
599 
/**

600 
* Motion vector coding, 17.1.

601 
*/

602 
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) 
603 
{ 
604 
int bit, x = 0; 
605  
606 
if (vp56_rac_get_prob_branchy(c, p[0])) { 
607 
int i;

608  
609 
for (i = 0; i < 3; i++) 
610 
x += vp56_rac_get_prob(c, p[9 + i]) << i;

611 
for (i = 9; i > 3; i) 
612 
x += vp56_rac_get_prob(c, p[9 + i]) << i;

613 
if (!(x & 0xFFF0)  vp56_rac_get_prob(c, p[12])) 
614 
x += 8;

615 
} else {

616 
// small_mvtree

617 
const uint8_t *ps = p+2; 
618 
bit = vp56_rac_get_prob(c, *ps); 
619 
ps += 1 + 3*bit; 
620 
x += 4*bit;

621 
bit = vp56_rac_get_prob(c, *ps); 
622 
ps += 1 + bit;

623 
x += 2*bit;

624 
x += vp56_rac_get_prob(c, *ps); 
625 
} 
626  
627 
return (x && vp56_rac_get_prob(c, p[1])) ? x : x; 
628 
} 
629  
630 
static av_always_inline

631 
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)

632 
{ 
633 
if (left == top)

634 
return vp8_submv_prob[4!!left]; 
635 
if (!top)

636 
return vp8_submv_prob[2]; 
637 
return vp8_submv_prob[1!!left]; 
638 
} 
639  
640 
/**

641 
* Split motion vector prediction, 16.4.

642 
* @returns the number of motion vectors parsed (2, 4 or 16)

643 
*/

644 
static av_always_inline

645 
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)

646 
{ 
647 
int part_idx;

648 
int n, num;

649 
VP8Macroblock *top_mb = &mb[2];

650 
VP8Macroblock *left_mb = &mb[1];

651 
const uint8_t *mbsplits_left = vp8_mbsplits[left_mb>partitioning],

652 
*mbsplits_top = vp8_mbsplits[top_mb>partitioning], 
653 
*mbsplits_cur, *firstidx; 
654 
VP56mv *top_mv = top_mb>bmv; 
655 
VP56mv *left_mv = left_mb>bmv; 
656 
VP56mv *cur_mv = mb>bmv; 
657  
658 
if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { 
659 
if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) { 
660 
part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);

661 
} else {

662 
part_idx = VP8_SPLITMVMODE_8x8; 
663 
} 
664 
} else {

665 
part_idx = VP8_SPLITMVMODE_4x4; 
666 
} 
667  
668 
num = vp8_mbsplit_count[part_idx]; 
669 
mbsplits_cur = vp8_mbsplits[part_idx], 
670 
firstidx = vp8_mbfirstidx[part_idx]; 
671 
mb>partitioning = part_idx; 
672  
673 
for (n = 0; n < num; n++) { 
674 
int k = firstidx[n];

675 
uint32_t left, above; 
676 
const uint8_t *submv_prob;

677  
678 
if (!(k & 3)) 
679 
left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);

680 
else

681 
left = AV_RN32A(&cur_mv[mbsplits_cur[k  1]]);

682 
if (k <= 3) 
683 
above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);

684 
else

685 
above = AV_RN32A(&cur_mv[mbsplits_cur[k  4]]);

686  
687 
submv_prob = get_submv_prob(left, above); 
688  
689 
if (vp56_rac_get_prob_branchy(c, submv_prob[0])) { 
690 
if (vp56_rac_get_prob_branchy(c, submv_prob[1])) { 
691 
if (vp56_rac_get_prob_branchy(c, submv_prob[2])) { 
692 
mb>bmv[n].y = mb>mv.y + read_mv_component(c, s>prob>mvc[0]);

693 
mb>bmv[n].x = mb>mv.x + read_mv_component(c, s>prob>mvc[1]);

694 
} else {

695 
AV_ZERO32(&mb>bmv[n]); 
696 
} 
697 
} else {

698 
AV_WN32A(&mb>bmv[n], above); 
699 
} 
700 
} else {

701 
AV_WN32A(&mb>bmv[n], left); 
702 
} 
703 
} 
704  
705 
return num;

706 
} 
707  
708 
static av_always_inline

709 
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,

710 
int mb_x, int keyframe) 
711 
{ 
712 
uint8_t *intra4x4 = s>intra4x4_pred_mode_mb; 
713 
if (keyframe) {

714 
int x, y;

715 
uint8_t* const top = s>intra4x4_pred_mode_top + 4 * mb_x; 
716 
uint8_t* const left = s>intra4x4_pred_mode_left;

717 
for (y = 0; y < 4; y++) { 
718 
for (x = 0; x < 4; x++) { 
719 
const uint8_t *ctx;

720 
ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; 
721 
*intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); 
722 
left[y] = top[x] = *intra4x4; 
723 
intra4x4++; 
724 
} 
725 
} 
726 
} else {

727 
int i;

728 
for (i = 0; i < 16; i++) 
729 
intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); 
730 
} 
731 
} 
732  
733 
static av_always_inline

734 
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment) 
735 
{ 
736 
VP56RangeCoder *c = &s>c; 
737  
738 
if (s>segmentation.update_map)

739 
*segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s>prob>segmentid); 
740 
s>segment = *segment; 
741  
742 
mb>skip = s>mbskip_enabled ? vp56_rac_get_prob(c, s>prob>mbskip) : 0;

743  
744 
if (s>keyframe) {

745 
mb>mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); 
746  
747 
if (mb>mode == MODE_I4x4) {

748 
decode_intra4x4_modes(s, c, mb_x, 1);

749 
} else {

750 
const uint32_t modes = vp8_pred4x4_mode[mb>mode] * 0x01010101u; 
751 
AV_WN32A(s>intra4x4_pred_mode_top + 4 * mb_x, modes);

752 
AV_WN32A(s>intra4x4_pred_mode_left, modes); 
753 
} 
754  
755 
s>chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); 
756 
mb>ref_frame = VP56_FRAME_CURRENT; 
757 
} else if (vp56_rac_get_prob_branchy(c, s>prob>intra)) { 
758 
VP56mv near[2], best;

759 
uint8_t cnt[4] = { 0 }; 
760  
761 
// inter MB, 16.2

762 
if (vp56_rac_get_prob_branchy(c, s>prob>last))

763 
mb>ref_frame = vp56_rac_get_prob(c, s>prob>golden) ? 
764 
VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;

765 
else

766 
mb>ref_frame = VP56_FRAME_PREVIOUS; 
767 
s>ref_count[mb>ref_frame1]++;

768  
769 
// motion vectors, 16.3

770 
find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt); 
771 
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[0]][0])) { 
772 
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[1]][1])) { 
773 
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[2]][2])) { 
774 
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[3]][3])) { 
775 
mb>mode = VP8_MVMODE_SPLIT; 
776 
clamp_mv(s, &mb>mv, &mb>mv, mb_x, mb_y); 
777 
mb>mv = mb>bmv[decode_splitmvs(s, c, mb)  1];

778 
} else {

779 
mb>mode = VP8_MVMODE_NEW; 
780 
clamp_mv(s, &mb>mv, &mb>mv, mb_x, mb_y); 
781 
mb>mv.y += + read_mv_component(c, s>prob>mvc[0]);

782 
mb>mv.x += + read_mv_component(c, s>prob>mvc[1]);

783 
} 
784 
} else {

785 
mb>mode = VP8_MVMODE_NEAR; 
786 
clamp_mv(s, &mb>mv, &near[1], mb_x, mb_y);

787 
} 
788 
} else {

789 
mb>mode = VP8_MVMODE_NEAREST; 
790 
clamp_mv(s, &mb>mv, &near[0], mb_x, mb_y);

791 
} 
792 
} else {

793 
mb>mode = VP8_MVMODE_ZERO; 
794 
AV_ZERO32(&mb>mv); 
795 
} 
796 
if (mb>mode != VP8_MVMODE_SPLIT) {

797 
mb>partitioning = VP8_SPLITMVMODE_NONE; 
798 
mb>bmv[0] = mb>mv;

799 
} 
800 
} else {

801 
// intra MB, 16.1

802 
mb>mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s>prob>pred16x16); 
803  
804 
if (mb>mode == MODE_I4x4)

805 
decode_intra4x4_modes(s, c, mb_x, 0);

806  
807 
s>chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s>prob>pred8x8c); 
808 
mb>ref_frame = VP56_FRAME_CURRENT; 
809 
mb>partitioning = VP8_SPLITMVMODE_NONE; 
810 
AV_ZERO32(&mb>bmv[0]);

811 
} 
812 
} 
813  
814 
/**

815 
* @param c arithmetic bitstream reader context

816 
* @param block destination for block coefficients

817 
* @param probs probabilities to use when reading trees from the bitstream

818 
* @param i initial coeff index, 0 unless a separate DC block is coded

819 
* @param zero_nhood the initial prediction context for number of surrounding

820 
* allzero blocks (only left/top, so 02)

821 
* @param qmul array holding the dc/ac dequant factor at position 0/1

822 
* @return 0 if no coeffs were decoded

823 
* otherwise, the index of the last coeff decoded plus one

824 
*/

825 
static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], 
826 
uint8_t probs[8][3][NUM_DCT_TOKENS1], 
827 
int i, int zero_nhood, int16_t qmul[2]) 
828 
{ 
829 
uint8_t *token_prob = probs[i][zero_nhood]; 
830 
int nonzero = 0; 
831 
int coeff;

832  
833 
do {

834 
if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB 
835 
return nonzero;

836  
837 
skip_eob:

838 
if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 
839 
if (++i == 16) 
840 
return nonzero; // invalid input; blocks should end with EOB 
841 
token_prob = probs[i][0];

842 
goto skip_eob;

843 
} 
844  
845 
if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 
846 
coeff = 1;

847 
token_prob = probs[i+1][1]; 
848 
} else {

849 
if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 
850 
coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);

851 
if (coeff)

852 
coeff += vp56_rac_get_prob(c, token_prob[5]);

853 
coeff += 2;

854 
} else {

855 
// DCT_CAT*

856 
if (!vp56_rac_get_prob_branchy(c, token_prob[6])) { 
857 
if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1 
858 
coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]); 
859 
} else { // DCT_CAT2 
860 
coeff = 7;

861 
coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1; 
862 
coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);

863 
} 
864 
} else { // DCT_CAT3 and up 
865 
int a = vp56_rac_get_prob(c, token_prob[8]); 
866 
int b = vp56_rac_get_prob(c, token_prob[9+a]); 
867 
int cat = (a<<1) + b; 
868 
coeff = 3 + (8<<cat); 
869 
coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); 
870 
} 
871 
} 
872 
token_prob = probs[i+1][2]; 
873 
} 
874  
875 
// todo: full [16] qmat? load into register?

876 
block[zigzag_scan[i]] = (vp8_rac_get(c) ? coeff : coeff) * qmul[!!i]; 
877 
nonzero = ++i; 
878 
} while (i < 16); 
879  
880 
return nonzero;

881 
} 
882  
883 
static av_always_inline

884 
void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,

885 
uint8_t t_nnz[9], uint8_t l_nnz[9]) 
886 
{ 
887 
int i, x, y, luma_start = 0, luma_ctx = 3; 
888 
int nnz_pred, nnz, nnz_total = 0; 
889 
int segment = s>segment;

890 
int block_dc = 0; 
891  
892 
if (mb>mode != MODE_I4x4 && mb>mode != VP8_MVMODE_SPLIT) {

893 
nnz_pred = t_nnz[8] + l_nnz[8]; 
894  
895 
// decode DC values and do hadamard

896 
nnz = decode_block_coeffs(c, s>block_dc, s>prob>token[1], 0, nnz_pred, 
897 
s>qmat[segment].luma_dc_qmul); 
898 
l_nnz[8] = t_nnz[8] = !!nnz; 
899 
if (nnz) {

900 
nnz_total += nnz; 
901 
block_dc = 1;

902 
if (nnz == 1) 
903 
s>vp8dsp.vp8_luma_dc_wht_dc(s>block, s>block_dc); 
904 
else

905 
s>vp8dsp.vp8_luma_dc_wht(s>block, s>block_dc); 
906 
} 
907 
luma_start = 1;

908 
luma_ctx = 0;

909 
} 
910  
911 
// luma blocks

912 
for (y = 0; y < 4; y++) 
913 
for (x = 0; x < 4; x++) { 
914 
nnz_pred = l_nnz[y] + t_nnz[x]; 
915 
nnz = decode_block_coeffs(c, s>block[y][x], s>prob>token[luma_ctx], luma_start, 
916 
nnz_pred, s>qmat[segment].luma_qmul); 
917 
// nnz+block_dc may be one more than the actual last index, but we don't care

918 
s>non_zero_count_cache[y][x] = nnz + block_dc; 
919 
t_nnz[x] = l_nnz[y] = !!nnz; 
920 
nnz_total += nnz; 
921 
} 
922  
923 
// chroma blocks

924 
// TODO: what to do about dimensions? 2nd dim for luma is x,

925 
// but for chroma it's (y<<1)x

926 
for (i = 4; i < 6; i++) 
927 
for (y = 0; y < 2; y++) 
928 
for (x = 0; x < 2; x++) { 
929 
nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; 
930 
nnz = decode_block_coeffs(c, s>block[i][(y<<1)+x], s>prob>token[2], 0, 
931 
nnz_pred, s>qmat[segment].chroma_qmul); 
932 
s>non_zero_count_cache[i][(y<<1)+x] = nnz;

933 
t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; 
934 
nnz_total += nnz; 
935 
} 
936  
937 
// if there were no coded coeffs despite the macroblock not being marked skip,

938 
// we MUST not do the inner loop filter and should not do IDCT

939 
// Since skip isn't used for bitstream prediction, just manually set it.

940 
if (!nnz_total)

941 
mb>skip = 1;

942 
} 
943  
944 
static av_always_inline

945 
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,

946 
int linesize, int uvlinesize, int simple) 
947 
{ 
948 
AV_COPY128(top_border, src_y + 15*linesize);

949 
if (!simple) {

950 
AV_COPY64(top_border+16, src_cb + 7*uvlinesize); 
951 
AV_COPY64(top_border+24, src_cr + 7*uvlinesize); 
952 
} 
953 
} 
954  
955 
static av_always_inline

956 
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,

957 
int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width, 
958 
int simple, int xchg) 
959 
{ 
960 
uint8_t *top_border_m1 = top_border32; // for TL prediction 
961 
src_y = linesize; 
962 
src_cb = uvlinesize; 
963 
src_cr = uvlinesize; 
964  
965 
#define XCHG(a,b,xchg) do { \ 
966 
if (xchg) AV_SWAP64(b,a); \

967 
else AV_COPY64(b,a); \

968 
} while (0) 
969  
970 
XCHG(top_border_m1+8, src_y8, xchg); 
971 
XCHG(top_border, src_y, xchg); 
972 
XCHG(top_border+8, src_y+8, 1); 
973 
if (mb_x < mb_width1) 
974 
XCHG(top_border+32, src_y+16, 1); 
975  
976 
// only copy chroma for normal loop filter

977 
// or to initialize the top row to 127

978 
if (!simple  !mb_y) {

979 
XCHG(top_border_m1+16, src_cb8, xchg); 
980 
XCHG(top_border_m1+24, src_cr8, xchg); 
981 
XCHG(top_border+16, src_cb, 1); 
982 
XCHG(top_border+24, src_cr, 1); 
983 
} 
984 
} 
985  
986 
static av_always_inline

987 
int check_intra_pred_mode(int mode, int mb_x, int mb_y) 
988 
{ 
989 
if (mode == DC_PRED8x8) {

990 
if (!mb_x) {

991 
mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; 
992 
} else if (!mb_y) { 
993 
mode = LEFT_DC_PRED8x8; 
994 
} 
995 
} 
996 
return mode;

997 
} 
998  
999 
static av_always_inline

1000 
void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 
1001 
int mb_x, int mb_y) 
1002 
{ 
1003 
int x, y, mode, nnz, tr;

1004  
1005 
// for the first row, we need to run xchg_mb_border to init the top edge to 127

1006 
// otherwise, skip it if we aren't going to deblock

1007 
if (s>deblock_filter  !mb_y)

1008 
xchg_mb_border(s>top_border[mb_x+1], dst[0], dst[1], dst[2], 
1009 
s>linesize, s>uvlinesize, mb_x, mb_y, s>mb_width, 
1010 
s>filter.simple, 1);

1011  
1012 
if (mb>mode < MODE_I4x4) {

1013 
mode = check_intra_pred_mode(mb>mode, mb_x, mb_y); 
1014 
s>hpc.pred16x16[mode](dst[0], s>linesize);

1015 
} else {

1016 
uint8_t *ptr = dst[0];

1017 
uint8_t *intra4x4 = s>intra4x4_pred_mode_mb; 
1018  
1019 
// all blocks on the right edge of the macroblock use bottom edge

1020 
// the top macroblock for their topright edge

1021 
uint8_t *tr_right = ptr  s>linesize + 16;

1022  
1023 
// if we're on the right edge of the frame, said edge is extended

1024 
// from the top macroblock

1025 
if (mb_x == s>mb_width1) { 
1026 
tr = tr_right[1]*0x01010101; 
1027 
tr_right = (uint8_t *)&tr; 
1028 
} 
1029  
1030 
if (mb>skip)

1031 
AV_ZERO128(s>non_zero_count_cache); 
1032  
1033 
for (y = 0; y < 4; y++) { 
1034 
uint8_t *topright = ptr + 4  s>linesize;

1035 
for (x = 0; x < 4; x++) { 
1036 
if (x == 3) 
1037 
topright = tr_right; 
1038  
1039 
s>hpc.pred4x4[intra4x4[x]](ptr+4*x, topright, s>linesize);

1040  
1041 
nnz = s>non_zero_count_cache[y][x]; 
1042 
if (nnz) {

1043 
if (nnz == 1) 
1044 
s>vp8dsp.vp8_idct_dc_add(ptr+4*x, s>block[y][x], s>linesize);

1045 
else

1046 
s>vp8dsp.vp8_idct_add(ptr+4*x, s>block[y][x], s>linesize);

1047 
} 
1048 
topright += 4;

1049 
} 
1050  
1051 
ptr += 4*s>linesize;

1052 
intra4x4 += 4;

1053 
} 
1054 
} 
1055  
1056 
mode = check_intra_pred_mode(s>chroma_pred_mode, mb_x, mb_y); 
1057 
s>hpc.pred8x8[mode](dst[1], s>uvlinesize);

1058 
s>hpc.pred8x8[mode](dst[2], s>uvlinesize);

1059  
1060 
if (s>deblock_filter  !mb_y)

1061 
xchg_mb_border(s>top_border[mb_x+1], dst[0], dst[1], dst[2], 
1062 
s>linesize, s>uvlinesize, mb_x, mb_y, s>mb_width, 
1063 
s>filter.simple, 0);

1064 
} 
1065  
1066 
/**

1067 
* Generic MC function.

1068 
*

1069 
* @param s VP8 decoding context

1070 
* @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes

1071 
* @param dst target buffer for block data at block position

1072 
* @param src reference picture buffer at origin (0, 0)

1073 
* @param mv motion vector (relative to block position) to get pixel data from

1074 
* @param x_off horizontal position of block from origin (0, 0)

1075 
* @param y_off vertical position of block from origin (0, 0)

1076 
* @param block_w width of block (16, 8 or 4)

1077 
* @param block_h height of block (always same as block_w)

1078 
* @param width width of src/dst plane data

1079 
* @param height height of src/dst plane data

1080 
* @param linesize size of a single line of plane data, including padding

1081 
* @param mc_func motion compensation function pointers (bilinear or sixtap MC)

1082 
*/

1083 
static av_always_inline

1084 
void vp8_mc(VP8Context *s, int luma, 
1085 
uint8_t *dst, uint8_t *src, const VP56mv *mv,

1086 
int x_off, int y_off, int block_w, int block_h, 
1087 
int width, int height, int linesize, 
1088 
vp8_mc_func mc_func[3][3]) 
1089 
{ 
1090 
if (AV_RN32A(mv)) {

1091 
static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; 
1092 
int mx = (mv>x << luma)&7, mx_idx = idx[mx]; 
1093 
int my = (mv>y << luma)&7, my_idx = idx[my]; 
1094  
1095 
x_off += mv>x >> (3  luma);

1096 
y_off += mv>y >> (3  luma);

1097  
1098 
// edge emulation

1099 
src += y_off * linesize + x_off; 
1100 
if (x_off < 2  x_off >= width  block_w  3  
1101 
y_off < 2  y_off >= height  block_h  3) { 
1102 
ff_emulated_edge_mc(s>edge_emu_buffer, src  2 * linesize  2, linesize, 
1103 
block_w + 5, block_h + 5, 
1104 
x_off  2, y_off  2, width, height); 
1105 
src = s>edge_emu_buffer + 2 + linesize * 2; 
1106 
} 
1107 
mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); 
1108 
} else

1109 
mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); 
1110 
} 
1111  
1112 
static av_always_inline

1113 
void vp8_mc_part(VP8Context *s, uint8_t *dst[3], 
1114 
AVFrame *ref_frame, int x_off, int y_off, 
1115 
int bx_off, int by_off, 
1116 
int block_w, int block_h, 
1117 
int width, int height, VP56mv *mv) 
1118 
{ 
1119 
VP56mv uvmv = *mv; 
1120  
1121 
/* Y */

1122 
vp8_mc(s, 1, dst[0] + by_off * s>linesize + bx_off, 
1123 
ref_frame>data[0], mv, x_off + bx_off, y_off + by_off,

1124 
block_w, block_h, width, height, s>linesize, 
1125 
s>put_pixels_tab[block_w == 8]);

1126  
1127 
/* U/V */

1128 
if (s>profile == 3) { 
1129 
uvmv.x &= ~7;

1130 
uvmv.y &= ~7;

1131 
} 
1132 
x_off >>= 1; y_off >>= 1; 
1133 
bx_off >>= 1; by_off >>= 1; 
1134 
width >>= 1; height >>= 1; 
1135 
block_w >>= 1; block_h >>= 1; 
1136 
vp8_mc(s, 0, dst[1] + by_off * s>uvlinesize + bx_off, 
1137 
ref_frame>data[1], &uvmv, x_off + bx_off, y_off + by_off,

1138 
block_w, block_h, width, height, s>uvlinesize, 
1139 
s>put_pixels_tab[1 + (block_w == 4)]); 
1140 
vp8_mc(s, 0, dst[2] + by_off * s>uvlinesize + bx_off, 
1141 
ref_frame>data[2], &uvmv, x_off + bx_off, y_off + by_off,

1142 
block_w, block_h, width, height, s>uvlinesize, 
1143 
s>put_pixels_tab[1 + (block_w == 4)]); 
1144 
} 
1145  
1146 
/* Fetch pixels for estimated mv 4 macroblocks ahead.

1147 
* Optimized for 64byte cache lines. Inspired by ffh264 prefetch_motion. */

1148 
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) 
1149 
{ 
1150 
/* Don't prefetch refs that haven't been used very often this frame. */

1151 
if (s>ref_count[ref1] > (mb_xy >> 5)) { 
1152 
int x_off = mb_x << 4, y_off = mb_y << 4; 
1153 
int mx = mb>mv.x + x_off + 8; 
1154 
int my = mb>mv.y + y_off;

1155 
uint8_t **src= s>framep[ref]>data; 
1156 
int off= mx + (my + (mb_x&3)*4)*s>linesize + 64; 
1157 
s>dsp.prefetch(src[0]+off, s>linesize, 4); 
1158 
off= (mx>>1) + ((my>>1) + (mb_x&7))*s>uvlinesize + 64; 
1159 
s>dsp.prefetch(src[1]+off, src[2]src[1], 2); 
1160 
} 
1161 
} 
1162  
1163 
/**

1164 
* Apply motion vectors to prediction buffer, chapter 18.

1165 
*/

1166 
static av_always_inline

1167 
void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 
1168 
int mb_x, int mb_y) 
1169 
{ 
1170 
int x_off = mb_x << 4, y_off = mb_y << 4; 
1171 
int width = 16*s>mb_width, height = 16*s>mb_height; 
1172 
AVFrame *ref = s>framep[mb>ref_frame]; 
1173 
VP56mv *bmv = mb>bmv; 
1174  
1175 
if (mb>mode < VP8_MVMODE_SPLIT) {

1176 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1177 
0, 0, 16, 16, width, height, &mb>mv); 
1178 
} else switch (mb>partitioning) { 
1179 
case VP8_SPLITMVMODE_4x4: {

1180 
int x, y;

1181 
VP56mv uvmv; 
1182  
1183 
/* Y */

1184 
for (y = 0; y < 4; y++) { 
1185 
for (x = 0; x < 4; x++) { 
1186 
vp8_mc(s, 1, dst[0] + 4*y*s>linesize + x*4, 
1187 
ref>data[0], &bmv[4*y + x], 
1188 
4*x + x_off, 4*y + y_off, 4, 4, 
1189 
width, height, s>linesize, 
1190 
s>put_pixels_tab[2]);

1191 
} 
1192 
} 
1193  
1194 
/* U/V */

1195 
x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; 
1196 
for (y = 0; y < 2; y++) { 
1197 
for (x = 0; x < 2; x++) { 
1198 
uvmv.x = mb>bmv[ 2*y * 4 + 2*x ].x + 
1199 
mb>bmv[ 2*y * 4 + 2*x+1].x + 
1200 
mb>bmv[(2*y+1) * 4 + 2*x ].x + 
1201 
mb>bmv[(2*y+1) * 4 + 2*x+1].x; 
1202 
uvmv.y = mb>bmv[ 2*y * 4 + 2*x ].y + 
1203 
mb>bmv[ 2*y * 4 + 2*x+1].y + 
1204 
mb>bmv[(2*y+1) * 4 + 2*x ].y + 
1205 
mb>bmv[(2*y+1) * 4 + 2*x+1].y; 
1206 
uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT1))) >> 2; 
1207 
uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT1))) >> 2; 
1208 
if (s>profile == 3) { 
1209 
uvmv.x &= ~7;

1210 
uvmv.y &= ~7;

1211 
} 
1212 
vp8_mc(s, 0, dst[1] + 4*y*s>uvlinesize + x*4, 
1213 
ref>data[1], &uvmv,

1214 
4*x + x_off, 4*y + y_off, 4, 4, 
1215 
width, height, s>uvlinesize, 
1216 
s>put_pixels_tab[2]);

1217 
vp8_mc(s, 0, dst[2] + 4*y*s>uvlinesize + x*4, 
1218 
ref>data[2], &uvmv,

1219 
4*x + x_off, 4*y + y_off, 4, 4, 
1220 
width, height, s>uvlinesize, 
1221 
s>put_pixels_tab[2]);

1222 
} 
1223 
} 
1224 
break;

1225 
} 
1226 
case VP8_SPLITMVMODE_16x8:

1227 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1228 
0, 0, 16, 8, width, height, &bmv[0]); 
1229 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1230 
0, 8, 16, 8, width, height, &bmv[1]); 
1231 
break;

1232 
case VP8_SPLITMVMODE_8x16:

1233 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1234 
0, 0, 8, 16, width, height, &bmv[0]); 
1235 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1236 
8, 0, 8, 16, width, height, &bmv[1]); 
1237 
break;

1238 
case VP8_SPLITMVMODE_8x8:

1239 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1240 
0, 0, 8, 8, width, height, &bmv[0]); 
1241 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1242 
8, 0, 8, 8, width, height, &bmv[1]); 
1243 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1244 
0, 8, 8, 8, width, height, &bmv[2]); 
1245 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1246 
8, 8, 8, 8, width, height, &bmv[3]); 
1247 
break;

1248 
} 
1249 
} 
1250  
1251 
static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) 
1252 
{ 
1253 
int x, y, ch;

1254  
1255 
if (mb>mode != MODE_I4x4) {

1256 
uint8_t *y_dst = dst[0];

1257 
for (y = 0; y < 4; y++) { 
1258 
uint32_t nnz4 = AV_RN32A(s>non_zero_count_cache[y]); 
1259 
if (nnz4) {

1260 
if (nnz4&~0x01010101) { 
1261 
for (x = 0; x < 4; x++) { 
1262 
int nnz = s>non_zero_count_cache[y][x];

1263 
if (nnz) {

1264 
if (nnz == 1) 
1265 
s>vp8dsp.vp8_idct_dc_add(y_dst+4*x, s>block[y][x], s>linesize);

1266 
else

1267 
s>vp8dsp.vp8_idct_add(y_dst+4*x, s>block[y][x], s>linesize);

1268 
} 
1269 
} 
1270 
} else {

1271 
s>vp8dsp.vp8_idct_dc_add4y(y_dst, s>block[y], s>linesize); 
1272 
} 
1273 
} 
1274 
y_dst += 4*s>linesize;

1275 
} 
1276 
} 
1277  
1278 
for (ch = 0; ch < 2; ch++) { 
1279 
uint32_t nnz4 = AV_RN32A(s>non_zero_count_cache[4+ch]);

1280 
if (nnz4) {

1281 
uint8_t *ch_dst = dst[1+ch];

1282 
if (nnz4&~0x01010101) { 
1283 
for (y = 0; y < 2; y++) { 
1284 
for (x = 0; x < 2; x++) { 
1285 
int nnz = s>non_zero_count_cache[4+ch][(y<<1)+x]; 
1286 
if (nnz) {

1287 
if (nnz == 1) 
1288 
s>vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s>block[4+ch][(y<<1)+x], s>uvlinesize); 
1289 
else

1290 
s>vp8dsp.vp8_idct_add(ch_dst+4*x, s>block[4+ch][(y<<1)+x], s>uvlinesize); 
1291 
} 
1292 
} 
1293 
ch_dst += 4*s>uvlinesize;

1294 
} 
1295 
} else {

1296 
s>vp8dsp.vp8_idct_dc_add4uv(ch_dst, s>block[4+ch], s>uvlinesize);

1297 
} 
1298 
} 
1299 
} 
1300 
} 
1301  
1302 
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) 
1303 
{ 
1304 
int interior_limit, filter_level;

1305  
1306 
if (s>segmentation.enabled) {

1307 
filter_level = s>segmentation.filter_level[s>segment]; 
1308 
if (!s>segmentation.absolute_vals)

1309 
filter_level += s>filter.level; 
1310 
} else

1311 
filter_level = s>filter.level; 
1312  
1313 
if (s>lf_delta.enabled) {

1314 
filter_level += s>lf_delta.ref[mb>ref_frame]; 
1315  
1316 
if (mb>ref_frame == VP56_FRAME_CURRENT) {

1317 
if (mb>mode == MODE_I4x4)

1318 
filter_level += s>lf_delta.mode[0];

1319 
} else {

1320 
if (mb>mode == VP8_MVMODE_ZERO)

1321 
filter_level += s>lf_delta.mode[1];

1322 
else if (mb>mode == VP8_MVMODE_SPLIT) 
1323 
filter_level += s>lf_delta.mode[3];

1324 
else

1325 
filter_level += s>lf_delta.mode[2];

1326 
} 
1327 
} 
1328 
filter_level = av_clip(filter_level, 0, 63); 
1329  
1330 
interior_limit = filter_level; 
1331 
if (s>filter.sharpness) {

1332 
interior_limit >>= s>filter.sharpness > 4 ? 2 : 1; 
1333 
interior_limit = FFMIN(interior_limit, 9  s>filter.sharpness);

1334 
} 
1335 
interior_limit = FFMAX(interior_limit, 1);

1336  
1337 
f>filter_level = filter_level; 
1338 
f>inner_limit = interior_limit; 
1339 
f>inner_filter = !mb>skip  mb>mode == MODE_I4x4  mb>mode == VP8_MVMODE_SPLIT; 
1340 
} 
1341  
1342 
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) 
1343 
{ 
1344 
int mbedge_lim, bedge_lim, hev_thresh;

1345 
int filter_level = f>filter_level;

1346 
int inner_limit = f>inner_limit;

1347 
int inner_filter = f>inner_filter;

1348 
int linesize = s>linesize;

1349 
int uvlinesize = s>uvlinesize;

1350  
1351 
if (!filter_level)

1352 
return;

1353  
1354 
mbedge_lim = 2*(filter_level+2) + inner_limit; 
1355 
bedge_lim = 2* filter_level + inner_limit;

1356 
hev_thresh = filter_level >= 15;

1357  
1358 
if (s>keyframe) {

1359 
if (filter_level >= 40) 
1360 
hev_thresh = 2;

1361 
} else {

1362 
if (filter_level >= 40) 
1363 
hev_thresh = 3;

1364 
else if (filter_level >= 20) 
1365 
hev_thresh = 2;

1366 
} 
1367  
1368 
if (mb_x) {

1369 
s>vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,

1370 
mbedge_lim, inner_limit, hev_thresh); 
1371 
s>vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, 
1372 
mbedge_lim, inner_limit, hev_thresh); 
1373 
} 
1374  
1375 
if (inner_filter) {

1376 
s>vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim, 
1377 
inner_limit, hev_thresh); 
1378 
s>vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim, 
1379 
inner_limit, hev_thresh); 
1380 
s>vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim, 
1381 
inner_limit, hev_thresh); 
1382 
s>vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, 
1383 
uvlinesize, bedge_lim, 
1384 
inner_limit, hev_thresh); 
1385 
} 
1386  
1387 
if (mb_y) {

1388 
s>vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,

1389 
mbedge_lim, inner_limit, hev_thresh); 
1390 
s>vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, 
1391 
mbedge_lim, inner_limit, hev_thresh); 
1392 
} 
1393  
1394 
if (inner_filter) {

1395 
s>vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize, 
1396 
linesize, bedge_lim, 
1397 
inner_limit, hev_thresh); 
1398 
s>vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize, 
1399 
linesize, bedge_lim, 
1400 
inner_limit, hev_thresh); 
1401 
s>vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize, 
1402 
linesize, bedge_lim, 
1403 
inner_limit, hev_thresh); 
1404 
s>vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, 
1405 
dst[2] + 4 * uvlinesize, 
1406 
uvlinesize, bedge_lim, 
1407 
inner_limit, hev_thresh); 
1408 
} 
1409 
} 
1410  
1411 
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) 
1412 
{ 
1413 
int mbedge_lim, bedge_lim;

1414 
int filter_level = f>filter_level;

1415 
int inner_limit = f>inner_limit;

1416 
int inner_filter = f>inner_filter;

1417 
int linesize = s>linesize;

1418  
1419 
if (!filter_level)

1420 
return;

1421  
1422 
mbedge_lim = 2*(filter_level+2) + inner_limit; 
1423 
bedge_lim = 2* filter_level + inner_limit;

1424  
1425 
if (mb_x)

1426 
s>vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); 
1427 
if (inner_filter) {

1428 
s>vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);

1429 
s>vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);

1430 
s>vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);

1431 
} 
1432  
1433 
if (mb_y)

1434 
s>vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); 
1435 
if (inner_filter) {

1436 
s>vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);

1437 
s>vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);

1438 
s>vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);

1439 
} 
1440 
} 
1441  
1442 
static void filter_mb_row(VP8Context *s, int mb_y) 
1443 
{ 
1444 
VP8FilterStrength *f = s>filter_strength; 
1445 
uint8_t *dst[3] = {

1446 
s>framep[VP56_FRAME_CURRENT]>data[0] + 16*mb_y*s>linesize, 
1447 
s>framep[VP56_FRAME_CURRENT]>data[1] + 8*mb_y*s>uvlinesize, 
1448 
s>framep[VP56_FRAME_CURRENT]>data[2] + 8*mb_y*s>uvlinesize 
1449 
}; 
1450 
int mb_x;

1451  
1452 
for (mb_x = 0; mb_x < s>mb_width; mb_x++) { 
1453 
backup_mb_border(s>top_border[mb_x+1], dst[0], dst[1], dst[2], s>linesize, s>uvlinesize, 0); 
1454 
filter_mb(s, dst, f++, mb_x, mb_y); 
1455 
dst[0] += 16; 
1456 
dst[1] += 8; 
1457 
dst[2] += 8; 
1458 
} 
1459 
} 
1460  
1461 
static void filter_mb_row_simple(VP8Context *s, int mb_y) 
1462 
{ 
1463 
VP8FilterStrength *f = s>filter_strength; 
1464 
uint8_t *dst = s>framep[VP56_FRAME_CURRENT]>data[0] + 16*mb_y*s>linesize; 
1465 
int mb_x;

1466  
1467 
for (mb_x = 0; mb_x < s>mb_width; mb_x++) { 
1468 
backup_mb_border(s>top_border[mb_x+1], dst, NULL, NULL, s>linesize, 0, 1); 
1469 
filter_mb_simple(s, dst, f++, mb_x, mb_y); 
1470 
dst += 16;

1471 
} 
1472 
} 
1473  
1474 
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, 
1475 
AVPacket *avpkt) 
1476 
{ 
1477 
VP8Context *s = avctx>priv_data; 
1478 
int ret, mb_x, mb_y, i, y, referenced;

1479 
enum AVDiscard skip_thresh;

1480 
AVFrame *av_uninit(curframe); 
1481  
1482 
if ((ret = decode_frame_header(s, avpkt>data, avpkt>size)) < 0) 
1483 
return ret;

1484  
1485 
referenced = s>update_last  s>update_golden == VP56_FRAME_CURRENT 
1486 
 s>update_altref == VP56_FRAME_CURRENT; 
1487  
1488 
skip_thresh = !referenced ? AVDISCARD_NONREF : 
1489 
!s>keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; 
1490  
1491 
if (avctx>skip_frame >= skip_thresh) {

1492 
s>invisible = 1;

1493 
goto skip_decode;

1494 
} 
1495 
s>deblock_filter = s>filter.level && avctx>skip_loop_filter < skip_thresh; 
1496  
1497 
for (i = 0; i < 4; i++) 
1498 
if (&s>frames[i] != s>framep[VP56_FRAME_PREVIOUS] &&

1499 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN] && 
1500 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN2]) { 
1501 
curframe = s>framep[VP56_FRAME_CURRENT] = &s>frames[i]; 
1502 
break;

1503 
} 
1504 
if (curframe>data[0]) 
1505 
avctx>release_buffer(avctx, curframe); 
1506  
1507 
curframe>key_frame = s>keyframe; 
1508 
curframe>pict_type = s>keyframe ? FF_I_TYPE : FF_P_TYPE; 
1509 
curframe>reference = referenced ? 3 : 0; 
1510 
if ((ret = avctx>get_buffer(avctx, curframe))) {

1511 
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");

1512 
return ret;

1513 
} 
1514  
1515 
// Given that arithmetic probabilities are updated every frame, it's quite likely

1516 
// that the values we have on a random interframe are complete junk if we didn't

1517 
// start decode on a keyframe. So just don't display anything rather than junk.

1518 
if (!s>keyframe && (!s>framep[VP56_FRAME_PREVIOUS] 

1519 
!s>framep[VP56_FRAME_GOLDEN]  
1520 
!s>framep[VP56_FRAME_GOLDEN2])) { 
1521 
av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");

1522 
return AVERROR_INVALIDDATA;

1523 
} 
1524  
1525 
s>linesize = curframe>linesize[0];

1526 
s>uvlinesize = curframe>linesize[1];

1527  
1528 
if (!s>edge_emu_buffer)

1529 
s>edge_emu_buffer = av_malloc(21*s>linesize);

1530  
1531 
memset(s>top_nnz, 0, s>mb_width*sizeof(*s>top_nnz)); 
1532  
1533 
/* Zero macroblock structures for top/left prediction from outside the frame. */

1534 
memset(s>macroblocks, 0, (s>mb_width + s>mb_height*2)*sizeof(*s>macroblocks)); 
1535  
1536 
// top edge of 127 for intra prediction

1537 
memset(s>top_border, 127, (s>mb_width+1)*sizeof(*s>top_border)); 
1538 
memset(s>ref_count, 0, sizeof(s>ref_count)); 
1539 
if (s>keyframe)

1540 
memset(s>intra4x4_pred_mode_top, DC_PRED, s>b4_stride*4);

1541  
1542 
for (mb_y = 0; mb_y < s>mb_height; mb_y++) { 
1543 
VP56RangeCoder *c = &s>coeff_partition[mb_y & (s>num_coeff_partitions1)];

1544 
VP8Macroblock *mb = s>macroblocks + (s>mb_height  mb_y  1)*2; 
1545 
uint8_t *segment_map = s>segmentation_map + mb_y*s>mb_stride; 
1546 
int mb_xy = mb_y * s>mb_stride;

1547 
uint8_t *dst[3] = {

1548 
curframe>data[0] + 16*mb_y*s>linesize, 
1549 
curframe>data[1] + 8*mb_y*s>uvlinesize, 
1550 
curframe>data[2] + 8*mb_y*s>uvlinesize 
1551 
}; 
1552  
1553 
memset(s>left_nnz, 0, sizeof(s>left_nnz)); 
1554 
AV_WN32A(s>intra4x4_pred_mode_left, DC_PRED*0x01010101);

1555  
1556 
// left edge of 129 for intra prediction

1557 
if (!(avctx>flags & CODEC_FLAG_EMU_EDGE))

1558 
for (i = 0; i < 3; i++) 
1559 
for (y = 0; y < 16>>!!i; y++) 
1560 
dst[i][y*curframe>linesize[i]1] = 129; 
1561 
if (mb_y)

1562 
memset(s>top_border, 129, sizeof(*s>top_border)); 
1563  
1564 
for (mb_x = 0; mb_x < s>mb_width; mb_x++, mb_xy++, mb++) { 
1565 
uint8_t *segment_mb = segment_map+mb_x; 
1566  
1567 
/* Prefetch the current frame, 4 MBs ahead */

1568 
s>dsp.prefetch(dst[0] + (mb_x&3)*4*s>linesize + 64, s>linesize, 4); 
1569 
s>dsp.prefetch(dst[1] + (mb_x&7)*s>uvlinesize + 64, dst[2]  dst[1], 2); 
1570  
1571 
decode_mb_mode(s, mb, mb_x, mb_y, segment_mb); 
1572  
1573 
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); 
1574  
1575 
if (!mb>skip)

1576 
decode_mb_coeffs(s, c, mb, s>top_nnz[mb_x], s>left_nnz); 
1577  
1578 
if (mb>mode <= MODE_I4x4)

1579 
intra_predict(s, dst, mb, mb_x, mb_y); 
1580 
else

1581 
inter_predict(s, dst, mb, mb_x, mb_y); 
1582  
1583 
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); 
1584  
1585 
if (!mb>skip) {

1586 
idct_mb(s, dst, mb); 
1587 
} else {

1588 
AV_ZERO64(s>left_nnz); 
1589 
AV_WN64(s>top_nnz[mb_x], 0); // array of 9, so unaligned 
1590  
1591 
// Reset DC block predictors if they would exist if the mb had coefficients

1592 
if (mb>mode != MODE_I4x4 && mb>mode != VP8_MVMODE_SPLIT) {

1593 
s>left_nnz[8] = 0; 
1594 
s>top_nnz[mb_x][8] = 0; 
1595 
} 
1596 
} 
1597  
1598 
if (s>deblock_filter)

1599 
filter_level_for_mb(s, mb, &s>filter_strength[mb_x]); 
1600  
1601 
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); 
1602  
1603 
dst[0] += 16; 
1604 
dst[1] += 8; 
1605 
dst[2] += 8; 
1606 
} 
1607 
if (s>deblock_filter) {

1608 
if (s>filter.simple)

1609 
filter_mb_row_simple(s, mb_y); 
1610 
else

1611 
filter_mb_row(s, mb_y); 
1612 
} 
1613 
} 
1614  
1615 
skip_decode:

1616 
// if future frames don't use the updated probabilities,

1617 
// reset them to the values we saved

1618 
if (!s>update_probabilities)

1619 
s>prob[0] = s>prob[1]; 
1620  
1621 
// check if golden and altref are swapped

1622 
if (s>update_altref == VP56_FRAME_GOLDEN &&

1623 
s>update_golden == VP56_FRAME_GOLDEN2) 
1624 
FFSWAP(AVFrame *, s>framep[VP56_FRAME_GOLDEN], s>framep[VP56_FRAME_GOLDEN2]); 
1625 
else {

1626 
if (s>update_altref != VP56_FRAME_NONE)

1627 
s>framep[VP56_FRAME_GOLDEN2] = s>framep[s>update_altref]; 
1628  
1629 
if (s>update_golden != VP56_FRAME_NONE)

1630 
s>framep[VP56_FRAME_GOLDEN] = s>framep[s>update_golden]; 
1631 
} 
1632  
1633 
if (s>update_last) // move cur>prev 
1634 
s>framep[VP56_FRAME_PREVIOUS] = s>framep[VP56_FRAME_CURRENT]; 
1635  
1636 
// release no longer referenced frames

1637 
for (i = 0; i < 4; i++) 
1638 
if (s>frames[i].data[0] && 
1639 
&s>frames[i] != s>framep[VP56_FRAME_CURRENT] && 
1640 
&s>frames[i] != s>framep[VP56_FRAME_PREVIOUS] && 
1641 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN] && 
1642 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN2]) 
1643 
avctx>release_buffer(avctx, &s>frames[i]); 
1644  
1645 
if (!s>invisible) {

1646 
*(AVFrame*)data = *s>framep[VP56_FRAME_CURRENT]; 
1647 
*data_size = sizeof(AVFrame);

1648 
} 
1649  
1650 
return avpkt>size;

1651 
} 
1652  
1653 
static av_cold int vp8_decode_init(AVCodecContext *avctx) 
1654 
{ 
1655 
VP8Context *s = avctx>priv_data; 
1656  
1657 
s>avctx = avctx; 
1658 
avctx>pix_fmt = PIX_FMT_YUV420P; 
1659  
1660 
dsputil_init(&s>dsp, avctx); 
1661 
ff_h264_pred_init(&s>hpc, CODEC_ID_VP8); 
1662 
ff_vp8dsp_init(&s>vp8dsp); 
1663  
1664 
// intra pred needs edge emulation among other things

1665 
if (avctx>flags&CODEC_FLAG_EMU_EDGE) {

1666 
av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported\n");

1667 
return AVERROR_PATCHWELCOME;

1668 
} 
1669  
1670 
return 0; 
1671 
} 
1672  
1673 
static av_cold int vp8_decode_free(AVCodecContext *avctx) 
1674 
{ 
1675 
vp8_decode_flush(avctx); 
1676 
return 0; 
1677 
} 
1678  
1679 
AVCodec vp8_decoder = { 
1680 
"vp8",

1681 
AVMEDIA_TYPE_VIDEO, 
1682 
CODEC_ID_VP8, 
1683 
sizeof(VP8Context),

1684 
vp8_decode_init, 
1685 
NULL,

1686 
vp8_decode_free, 
1687 
vp8_decode_frame, 
1688 
CODEC_CAP_DR1, 
1689 
.flush = vp8_decode_flush, 
1690 
.long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),

1691 
}; 