Commit 5c8703d4 authored by Ondřej Nový's avatar Ondřej Nový

Merge tag 'debian/2%0.148.2748+git97eaef2-1_bpo8+1' into jessie-backports

tagging package x264 version debian/2%0.148.2748+git97eaef2-1_bpo8+1
parents 5d7995a4 60ef258d
......@@ -1254,8 +1254,9 @@ load_deinterleave_chroma:
endfunc
function x264_plane_copy_core_neon, export=1
add x8, x4, #15
and x4, x8, #~15
add w8, w4, #15 // 32-bit write clears the upper 32-bit the register
and w4, w8, #~15
// safe use of the full reg since negative width makes no sense
sub x1, x1, x4
sub x3, x3, x4
1:
......@@ -1281,6 +1282,34 @@ function x264_plane_copy_core_neon, export=1
ret
endfunc
function x264_plane_copy_swap_core_neon, export=1
lsl w4, w4, #1
sub x1, x1, x4
sub x3, x3, x4
1:
mov w8, w4
tbz w4, #4, 32f
subs w8, w8, #16
ld1 {v0.16b}, [x2], #16
rev16 v0.16b, v0.16b
st1 {v0.16b}, [x0], #16
b.eq 0f
32:
subs w8, w8, #32
ld1 {v0.16b,v1.16b}, [x2], #32
rev16 v0.16b, v0.16b
rev16 v1.16b, v1.16b
st1 {v0.16b,v1.16b}, [x0], #32
b.gt 32b
0:
subs w5, w5, #1
add x2, x2, x3
add x0, x0, x1
b.gt 1b
ret
endfunc
function x264_plane_copy_deinterleave_neon, export=1
add w9, w6, #15
and w9, w9, #0xfffffff0
......
......@@ -51,6 +51,8 @@ void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t
void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst,
pixel *src, intptr_t i_src, int w, int h );
void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst,
pixel *src, intptr_t i_src, int w, int h );
void x264_plane_copy_deinterleave_neon( pixel *dstu, intptr_t i_dstu,
pixel *dstv, intptr_t i_dstv,
pixel *src, intptr_t i_src, int w, int h );
......@@ -152,7 +154,7 @@ static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride,
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if ( (mvy&3) == 3 ) // explict if() to force conditional add
if( (mvy&3) == 3 ) // explict if() to force conditional add
src1 += i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */
......@@ -178,7 +180,7 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride,
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if ( (mvy&3) == 3 ) // explict if() to force conditional add
if( (mvy&3) == 3 ) // explict if() to force conditional add
src1 += i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */
......@@ -208,6 +210,7 @@ void x264_hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
int height, int16_t *buf );
PLANE_COPY(16, neon)
PLANE_COPY_SWAP(16, neon)
PLANE_INTERLEAVE(neon)
#endif // !HIGH_BIT_DEPTH
......@@ -232,6 +235,7 @@ void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf )
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_neon;
pf->plane_copy = x264_plane_copy_neon;
pf->plane_copy_swap = x264_plane_copy_swap_neon;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
......
......@@ -54,13 +54,13 @@ void x264_predict_16x16_p_neon( uint8_t *src );
void x264_predict_4x4_init_aarch64( int cpu, x264_predict_t pf[12] )
{
#if !HIGH_BIT_DEPTH
if (cpu&X264_CPU_ARMV8)
if( cpu&X264_CPU_ARMV8 )
{
pf[I_PRED_4x4_H] = x264_predict_4x4_h_aarch64;
pf[I_PRED_4x4_V] = x264_predict_4x4_v_aarch64;
}
if (cpu&X264_CPU_NEON)
if( cpu&X264_CPU_NEON )
{
pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_neon;
pf[I_PRED_4x4_DC_TOP] = x264_predict_4x4_dc_top_neon;
......@@ -73,11 +73,12 @@ void x264_predict_4x4_init_aarch64( int cpu, x264_predict_t pf[12] )
void x264_predict_8x8c_init_aarch64( int cpu, x264_predict_t pf[7] )
{
#if !HIGH_BIT_DEPTH
if (cpu&X264_CPU_ARMV8) {
if( cpu&X264_CPU_ARMV8 )
{
pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_aarch64;
}
if (!(cpu&X264_CPU_NEON))
if( !(cpu&X264_CPU_NEON) )
return;
pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_neon;
......@@ -91,7 +92,7 @@ void x264_predict_8x8c_init_aarch64( int cpu, x264_predict_t pf[7] )
void x264_predict_8x16c_init_aarch64( int cpu, x264_predict_t pf[7] )
{
if (!(cpu&X264_CPU_NEON))
if( !(cpu&X264_CPU_NEON) )
return;
#if !HIGH_BIT_DEPTH
......@@ -106,7 +107,7 @@ void x264_predict_8x16c_init_aarch64( int cpu, x264_predict_t pf[7] )
void x264_predict_8x8_init_aarch64( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter )
{
if (!(cpu&X264_CPU_NEON))
if( !(cpu&X264_CPU_NEON) )
return;
#if !HIGH_BIT_DEPTH
......@@ -124,7 +125,7 @@ void x264_predict_8x8_init_aarch64( int cpu, x264_predict8x8_t pf[12], x264_pred
void x264_predict_16x16_init_aarch64( int cpu, x264_predict_t pf[7] )
{
if (!(cpu&X264_CPU_NEON))
if( !(cpu&X264_CPU_NEON) )
return;
#if !HIGH_BIT_DEPTH
......
......@@ -211,8 +211,8 @@ endfunc
vclt.u8 q13, q4, q14 @ < (alpha >> 2) + 2 if_2
vand q12, q7, q6 @ if_1
vshrn.u16 d28, q12, #4
vcmp.f64 d28, #0
vmrs APSR_nzcv, FPSCR
vmov r2, lr, d28
orrs r2, r2, lr
beq 9f
sub sp, sp, #32
......@@ -325,6 +325,7 @@ endfunc
.endm
function x264_deblock_v_luma_intra_neon
push {lr}
vld1.64 {d0, d1}, [r0,:128], r1
vld1.64 {d2, d3}, [r0,:128], r1
vld1.64 {d4, d5}, [r0,:128], r1
......@@ -348,10 +349,11 @@ function x264_deblock_v_luma_intra_neon
vst1.64 {d4, d5}, [r0,:128]
9:
align_pop_regs
bx lr
pop {pc}
endfunc
function x264_deblock_h_luma_intra_neon
push {lr}
sub r0, r0, #4
vld1.64 {d22}, [r0], r1
vld1.64 {d20}, [r0], r1
......@@ -397,7 +399,7 @@ function x264_deblock_h_luma_intra_neon
vst1.64 {d7}, [r0], r1
9:
align_pop_regs
bx lr
pop {pc}
endfunc
.macro h264_loop_filter_chroma
......
......@@ -1818,13 +1818,14 @@ function x264_mbtree_propagate_cost_neon
endfunc
function x264_mbtree_propagate_list_internal_neon
vld2.16 {d4[], d5[]}, [sp] @ bipred_weight, mb_y
vld1.16 {d4[]}, [sp] @ bipred_weight
movrel r12, pw_0to15
vmov.u16 q10, #0xc000
vld1.16 {q0}, [r12, :128] @h->mb.i_mb_x,h->mb.i_mb_y
ldrh r12, [sp, #4]
vmov.u32 q11, #4
vmov.u8 q3, #32
vdup.u16 q8, d5[0] @ mb_y
vdup.u16 q8, r12 @ mb_y
vzip.u16 q0, q8
ldr r12, [sp, #8]
8:
......
......@@ -161,7 +161,7 @@ static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride,
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if ( (mvy&3) == 3 ) // explict if() to force conditional add
if( (mvy&3) == 3 ) // explict if() to force conditional add
src1 += i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */
......@@ -187,7 +187,7 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride,
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if ( (mvy&3) == 3 ) // explict if() to force conditional add
if( (mvy&3) == 3 ) // explict if() to force conditional add
src1 += i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */
......
......@@ -29,7 +29,7 @@
void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
{
if (!(cpu&X264_CPU_ARMV6))
if( !(cpu&X264_CPU_ARMV6) )
return;
#if !HIGH_BIT_DEPTH
......@@ -38,7 +38,7 @@ void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_armv6;
pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_armv6;
if (!(cpu&X264_CPU_NEON))
if( !(cpu&X264_CPU_NEON) )
return;
pf[I_PRED_4x4_DC_TOP] = x264_predict_4x4_dc_top_neon;
......@@ -48,7 +48,7 @@ void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] )
{
if (!(cpu&X264_CPU_NEON))
if( !(cpu&X264_CPU_NEON) )
return;
#if !HIGH_BIT_DEPTH
......@@ -63,7 +63,7 @@ void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] )
void x264_predict_8x16c_init_arm( int cpu, x264_predict_t pf[7] )
{
if (!(cpu&X264_CPU_NEON))
if( !(cpu&X264_CPU_NEON) )
return;
#if !HIGH_BIT_DEPTH
......@@ -76,7 +76,7 @@ void x264_predict_8x16c_init_arm( int cpu, x264_predict_t pf[7] )
void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter )
{
if (!(cpu&X264_CPU_NEON))
if( !(cpu&X264_CPU_NEON) )
return;
#if !HIGH_BIT_DEPTH
......@@ -94,7 +94,7 @@ void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_
void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] )
{
if (!(cpu&X264_CPU_NEON))
if( !(cpu&X264_CPU_NEON) )
return;
#if !HIGH_BIT_DEPTH
......
......@@ -50,8 +50,8 @@ void x264_cabac_block_residual_8x8_rd_internal_sse2 ( dctcoef *l, int b_in
void x264_cabac_block_residual_8x8_rd_internal_sse2_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
void x264_cabac_block_residual_8x8_rd_internal_ssse3 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
void x264_cabac_block_residual_8x8_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
void x264_cabac_block_residual_internal_sse2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
void x264_cabac_block_residual_internal_sse2_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
void x264_cabac_block_residual_internal_sse2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
void x264_cabac_block_residual_internal_sse2_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
void x264_cabac_block_residual_internal_avx2_bmi2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end );
......
......@@ -68,7 +68,7 @@ extern const vlc_t x264_total_zeros_2x4_dc[7][8];
typedef struct
{
uint8_t *(*nal_escape) ( uint8_t *dst, uint8_t *src, uint8_t *end );
uint8_t *(*nal_escape)( uint8_t *dst, uint8_t *src, uint8_t *end );
void (*cabac_block_residual_internal)( dctcoef *l, int b_interlaced,
intptr_t ctx_block_cat, x264_cabac_t *cb );
void (*cabac_block_residual_rd_internal)( dctcoef *l, int b_interlaced,
......
......@@ -58,7 +58,7 @@ extern const uint16_t x264_cabac_entropy[128];
void x264_cabac_context_init( x264_t *h, x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model );
void x264_cabac_encode_init_core( x264_cabac_t *cb );
void x264_cabac_encode_init ( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end );
void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end );
void x264_cabac_encode_decision_c( x264_cabac_t *cb, int i_ctx, int b );
void x264_cabac_encode_decision_asm( x264_cabac_t *cb, int i_ctx, int b );
void x264_cabac_encode_bypass_c( x264_cabac_t *cb, int b );
......
......@@ -221,7 +221,6 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
}
else if( !strcasecmp( preset, "veryfast" ) )
{
param->analyse.i_me_method = X264_ME_HEX;
param->analyse.i_subpel_refine = 2;
param->i_frame_reference = 1;
param->analyse.b_mixed_references = 0;
......@@ -250,11 +249,10 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
}
else if( !strcasecmp( preset, "slow" ) )
{
param->analyse.i_me_method = X264_ME_UMH;
param->analyse.i_subpel_refine = 8;
param->i_frame_reference = 5;
param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
param->analyse.i_trellis = 2;
param->rc.i_lookahead = 50;
}
else if( !strcasecmp( preset, "slower" ) )
......@@ -612,7 +610,7 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
#define OPT(STR) else if( !strcmp( name, STR ) )
#define OPT2(STR0, STR1) else if( !strcmp( name, STR0 ) || !strcmp( name, STR1 ) )
if(0);
if( 0 );
OPT("asm")
{
p->cpu = isdigit(value[0]) ? atoi(value) :
......
......@@ -36,7 +36,7 @@
#define X264_MAX3(a,b,c) X264_MAX((a),X264_MAX((b),(c)))
#define X264_MIN4(a,b,c,d) X264_MIN((a),X264_MIN3((b),(c),(d)))
#define X264_MAX4(a,b,c,d) X264_MAX((a),X264_MAX3((b),(c),(d)))
#define XCHG(type,a,b) do{ type t = a; a = b; b = t; } while(0)
#define XCHG(type,a,b) do { type t = a; a = b; b = t; } while( 0 )
#define IS_DISPOSABLE(type) ( type == X264_TYPE_B )
#define FIX8(f) ((int)(f*(1<<8)+.5))
#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
......@@ -70,14 +70,14 @@ do {\
var = (void*)prealloc_size;\
preallocs[prealloc_idx++] = (uint8_t**)&var;\
prealloc_size += ALIGN(size, NATIVE_ALIGN);\
} while(0)
} while( 0 )
#define PREALLOC_END( ptr )\
do {\
CHECKED_MALLOC( ptr, prealloc_size );\
while( prealloc_idx-- )\
*preallocs[prealloc_idx] += (intptr_t)ptr;\
} while(0)
} while( 0 )
#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
......
......@@ -720,10 +720,13 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
dctf->sub8x8_dct = x264_sub8x8_dct_altivec;
dctf->sub16x16_dct = x264_sub16x16_dct_altivec;
dctf->add8x8_idct_dc = x264_add8x8_idct_dc_altivec;
dctf->add4x4_idct = x264_add4x4_idct_altivec;
dctf->add8x8_idct = x264_add8x8_idct_altivec;
dctf->add16x16_idct = x264_add16x16_idct_altivec;
dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_altivec;
dctf->sub8x8_dct8 = x264_sub8x8_dct8_altivec;
dctf->sub16x16_dct8 = x264_sub16x16_dct8_altivec;
......@@ -990,10 +993,11 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
if( cpu&X264_CPU_MMX2 )
{
pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_mmx2;
pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_mmx2;
pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_mmx2;
}
if( cpu&X264_CPU_SSE )
pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_sse;
if( cpu&X264_CPU_SSE2_IS_FAST )
pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
if( cpu&X264_CPU_SSSE3 )
......@@ -1028,6 +1032,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
{
pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_altivec;
pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_altivec;
pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_altivec;
}
#endif
#if HAVE_ARMV6 || ARCH_AARCH64
......@@ -1096,13 +1101,20 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_neon;
}
#endif // ARCH_AARCH64
#endif // !HIGH_BIT_DEPTH
#if !HIGH_BIT_DEPTH
#if HAVE_ALTIVEC
if( cpu&X264_CPU_ALTIVEC )
{
pf_interlaced->interleave_8x8_cavlc =
pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_altivec;
}
#endif // HAVE_ALTIVEC
#if HAVE_MSA
if( cpu&X264_CPU_MSA )
{
pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_msa;
}
#endif
#endif
#endif // !HIGH_BIT_DEPTH
}
......@@ -36,22 +36,22 @@ typedef struct
// pix1 stride = FENC_STRIDE
// pix2 stride = FDEC_STRIDE
// p_dst stride = FDEC_STRIDE
void (*sub4x4_dct) ( dctcoef dct[16], pixel *pix1, pixel *pix2 );
void (*add4x4_idct) ( pixel *p_dst, dctcoef dct[16] );
void (*sub4x4_dct) ( dctcoef dct[16], pixel *pix1, pixel *pix2 );
void (*add4x4_idct)( pixel *p_dst, dctcoef dct[16] );
void (*sub8x8_dct) ( dctcoef dct[4][16], pixel *pix1, pixel *pix2 );
void (*sub8x8_dct_dc)( dctcoef dct[4], pixel *pix1, pixel *pix2 );
void (*add8x8_idct) ( pixel *p_dst, dctcoef dct[4][16] );
void (*add8x8_idct_dc) ( pixel *p_dst, dctcoef dct[4] );
void (*sub8x8_dct) ( dctcoef dct[4][16], pixel *pix1, pixel *pix2 );
void (*sub8x8_dct_dc) ( dctcoef dct[4], pixel *pix1, pixel *pix2 );
void (*add8x8_idct) ( pixel *p_dst, dctcoef dct[4][16] );
void (*add8x8_idct_dc)( pixel *p_dst, dctcoef dct[4] );
void (*sub8x16_dct_dc)( dctcoef dct[8], pixel *pix1, pixel *pix2 );
void (*sub16x16_dct) ( dctcoef dct[16][16], pixel *pix1, pixel *pix2 );
void (*add16x16_idct)( pixel *p_dst, dctcoef dct[16][16] );
void (*add16x16_idct_dc) ( pixel *p_dst, dctcoef dct[16] );
void (*sub16x16_dct) ( dctcoef dct[16][16], pixel *pix1, pixel *pix2 );
void (*add16x16_idct) ( pixel *p_dst, dctcoef dct[16][16] );
void (*add16x16_idct_dc)( pixel *p_dst, dctcoef dct[16] );
void (*sub8x8_dct8) ( dctcoef dct[64], pixel *pix1, pixel *pix2 );
void (*add8x8_idct8) ( pixel *p_dst, dctcoef dct[64] );
void (*sub8x8_dct8) ( dctcoef dct[64], pixel *pix1, pixel *pix2 );
void (*add8x8_idct8)( pixel *p_dst, dctcoef dct[64] );
void (*sub16x16_dct8) ( dctcoef dct[4][64], pixel *pix1, pixel *pix2 );
void (*add16x16_idct8)( pixel *p_dst, dctcoef dct[4][64] );
......
......@@ -442,7 +442,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
stride2uv, bs[dir][edge], chroma_qp, a, b, 1,\
h->loopf.deblock_chroma##intra[dir] );\
}\
} while(0)
} while( 0 )
if( h->mb.i_neighbour & MB_LEFT )
{
......@@ -638,7 +638,7 @@ void x264_macroblock_deblock( x264_t *h )
FDEC_STRIDE, bs[dir][edge], qpc, a, b, 0,\
h->loopf.deblock_luma[dir] );\
}\
} while(0)
} while( 0 )
if( !transform_8x8 ) FILTER( 0, 1 );
FILTER( 0, 2 );
......
......@@ -357,7 +357,7 @@ static int get_plane_ptr( x264_t *h, x264_picture_t *src, uint8_t **pix, int *st
return 0;
}
#define get_plane_ptr(...) do{ if( get_plane_ptr(__VA_ARGS__) < 0 ) return -1; }while(0)
#define get_plane_ptr(...) do { if( get_plane_ptr(__VA_ARGS__) < 0 ) return -1; } while( 0 )
int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
{
......
......@@ -211,9 +211,9 @@ typedef struct
x264_deblock_intra_t deblock_chroma_intra_mbaff;
x264_deblock_intra_t deblock_chroma_420_intra_mbaff;
x264_deblock_intra_t deblock_chroma_422_intra_mbaff;
void (*deblock_strength) ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
int bframe );
void (*deblock_strength)( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
int bframe );
} x264_deblock_function_t;
void x264_frame_delete( x264_frame_t *frame );
......
......@@ -1183,7 +1183,7 @@ static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int m
M16( h->mb.cache.mvd[l][x264_scan8[0]-1+0*8] ) = 0;
M16( h->mb.cache.mvd[l][x264_scan8[0]-1+1*8] ) = 0;
}
if( h->mb.i_neighbour & MB_LEFT && (!b_mbaff || h->mb.cache.ref[l][x264_scan8[0]-1+2*8] >=0) )
if( h->mb.i_neighbour & MB_LEFT && (!b_mbaff || h->mb.cache.ref[l][x264_scan8[0]-1+2*8] >= 0) )
{
CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left[LBOT]][left_index_table->intra[2]] );
CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left[LBOT]][left_index_table->intra[3]] );
......
......@@ -108,10 +108,10 @@ static const uint8_t x264_mb_type_list_table[X264_MBTYPE_MAX][2][2] =
{{0,0},{0,0}} /* B_SKIP */
};
#define IS_SUB4x4(type) ( (type ==D_L0_4x4)||(type ==D_L1_4x4)||(type ==D_BI_4x4))
#define IS_SUB4x8(type) ( (type ==D_L0_4x8)||(type ==D_L1_4x8)||(type ==D_BI_4x8))
#define IS_SUB8x4(type) ( (type ==D_L0_8x4)||(type ==D_L1_8x4)||(type ==D_BI_8x4))
#define IS_SUB8x8(type) ( (type ==D_L0_8x8)||(type ==D_L1_8x8)||(type ==D_BI_8x8)||(type ==D_DIRECT_8x8))
#define IS_SUB4x4(type) ( (type == D_L0_4x4)||(type == D_L1_4x4)||(type == D_BI_4x4) )
#define IS_SUB4x8(type) ( (type == D_L0_4x8)||(type == D_L1_4x8)||(type == D_BI_4x8) )
#define IS_SUB8x4(type) ( (type == D_L0_8x4)||(type == D_L1_8x4)||(type == D_BI_8x4) )
#define IS_SUB8x8(type) ( (type == D_L0_8x8)||(type == D_L1_8x8)||(type == D_BI_8x8)||(type == D_DIRECT_8x8) )
enum mb_partition_e
{
/* sub partition type for P_8x8 and B_8x8 */
......
......@@ -353,6 +353,15 @@ static void x264_plane_copy_deinterleave_rgb_c( pixel *dsta, intptr_t i_dsta,
}
}
#if WORDS_BIGENDIAN
static ALWAYS_INLINE uint32_t v210_endian_fix32( uint32_t x )
{
return (x<<24) + ((x<<8)&0xff0000) + ((x>>8)&0xff00) + (x>>24);
}
#else
#define v210_endian_fix32(x) (x)
#endif
void x264_plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty,
pixel *dstc, intptr_t i_dstc,
uint32_t *src, intptr_t i_src, int w, int h )
......@@ -365,14 +374,14 @@ void x264_plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty,
for( int n = 0; n < w; n += 3 )
{
*(dstc0++) = *src0 & 0x03FF;
*(dsty0++) = ( *src0 >> 10 ) & 0x03FF;
*(dstc0++) = ( *src0 >> 20 ) & 0x03FF;
src0++;
*(dsty0++) = *src0 & 0x03FF;
*(dstc0++) = ( *src0 >> 10 ) & 0x03FF;
*(dsty0++) = ( *src0 >> 20 ) & 0x03FF;
src0++;
uint32_t s = v210_endian_fix32( *src0++ );
*dstc0++ = s & 0x03FF;
*dsty0++ = (s >> 10) & 0x03FF;
*dstc0++ = (s >> 20) & 0x03FF;
s = v210_endian_fix32( *src0++ );
*dsty0++ = s & 0x03FF;
*dstc0++ = (s >> 10) & 0x03FF;
*dsty0++ = (s >> 20) & 0x03FF;
}
dsty += i_dsty;
......
......@@ -32,7 +32,7 @@ do\
{\
MC_CLIP_ADD((s)[0], (x)[0]);\
MC_CLIP_ADD((s)[1], (x)[1]);\
} while(0)
} while( 0 )
#define PROPAGATE_LIST(cpu)\
void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\
......
......@@ -3430,7 +3430,7 @@ uint8_t *x264_get_ref_msa( uint8_t *p_dst, intptr_t *p_dst_stride,
x264_mc_weight_w8_msa( p_dst, *p_dst_stride,
p_dst, *p_dst_stride,
pWeight, i_h4w );
for( i_cnt = i_h4w; i_cnt < i_height ; i_cnt++ )
for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ )
{
uint64_t temp0;
v16i8 zero = {0};
......@@ -3666,7 +3666,7 @@ uint8_t *x264_get_ref_msa( uint8_t *p_dst, intptr_t *p_dst_stride,
pWeight, i_h4w );
p_src1 = src1_org + i_h4w * i_src_stride;
for( i_cnt = i_h4w; i_cnt < i_height ; i_cnt++ )
for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ )
{
uint64_t u_temp0;
v16i8 zero = {0};
......
......@@ -65,7 +65,7 @@ static uint32_t sad_4width_msa( uint8_t *p_src, int32_t i_src_stride,
v16u8 diff;
v8u16 sad = { 0 };
for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
for( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
{
LW4( p_src, i_src_stride, u_src0, u_src1, u_src2, u_src3 );
p_src += ( 4 * i_src_stride );
......@@ -90,7 +90,7 @@ static uint32_t sad_8width_msa( uint8_t *p_src, int32_t i_src_stride,
v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
v8u16 sad = { 0 };
for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
for( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
{
LD_UB4( p_src, i_src_stride, src0, src1, src2, src3 );
p_src += ( 4 * i_src_stride );
......@@ -113,7 +113,7 @@ static uint32_t sad_16width_msa( uint8_t *p_src, int32_t i_src_stride,
v16u8 src0, src1, ref0, ref1;
v8u16 sad = { 0 };
for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
for( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
{
LD_UB2( p_src, i_src_stride, src0, src1 );
p_src += ( 2 * i_src_stride );
......@@ -147,7 +147,7 @@ static void sad_4width_x3d_msa( uint8_t *p_src, int32_t i_src_stride,
v8u16 sad1 = { 0 };
v8u16 sad2 = { 0 };
for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
for( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
{
LW4( p_src, i_src_stride, src0, src1, src2, src3 );
INSERT_W4_UB( src0, src1, src2, src3, src );
......@@ -192,7 +192,7 @@ static void sad_8width_x3d_msa( uint8_t *p_src, int32_t i_src_stride,
v8u16 sad1 = { 0 };
v8u16 sad2 = { 0 };
for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
for( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
{
LD_UB4( p_src, i_src_stride, src0, src1, src2, src3 );
p_src += ( 4 * i_src_stride );
......@@ -233,7 +233,7 @@ static void sad_16width_x3d_msa( uint8_t *p_src, int32_t i_src_stride,
v8u16 sad1 = { 0 };
v8u16 sad2 = { 0 };
for ( i_ht_cnt = ( i_height >> 1 ); i_ht_cnt--; )
for( i_ht_cnt = ( i_height >> 1 ); i_ht_cnt--; )
{
src = LD_UB( p_src );
p_src += i_src_stride;
......@@ -298,7 +298,7 @@ static void sad_4width_x4d_msa( uint8_t *p_src, int32_t i_src_stride,
p_ref2 = p_aref[2];
p_ref3 = p_aref[3];
for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
for( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
{
LW4( p_src, i_src_stride, src0, src1, src2, src3 );
INSERT_W4_UB( src0, src1, src2, src3, src );
......@@ -358,7 +358,7 @@ static void sad_8width_x4d_msa( uint8_t *p_src, int32_t i_src_stride,
p_ref2 = p_aref[2];
p_ref3 = p_aref[3];
for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
for( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
{
LD_UB4( p_src, i_src_stride, src0, src1, src2, src3 );
p_src += ( 4 * i_src_stride );
......@@ -408,7 +408,7 @@ static void sad_16width_x4d_msa( uint8_t *p_src, int32_t i_src_stride,
p_ref2 = p_aref[2];
p_ref3 = p_aref[3];
for ( i_ht_cnt = ( i_height >> 1 ); i_ht_cnt--; )
for( i_ht_cnt = ( i_height >> 1 ); i_ht_cnt--; )
{
src = LD_UB( p_src );
p_src += i_src_stride;
......@@ -465,7 +465,7 @@ static uint64_t avc_pixel_var16width_msa( uint8_t *p_pix, int32_t i_stride,
v8u16 add, pix_r, pix_l;
v4u32 sqr = { 0 };
for ( u_cnt = i_height; u_cnt--; )
for( u_cnt = i_height; u_cnt--; )
{
pix = LD_SB( p_pix );
p_pix += i_stride;
......@@ -489,7 +489,7 @@ static uint64_t avc_pixel_var8width_msa( uint8_t *p_pix, int32_t i_stride,
v8u16 add, pix_r;
v4u32 sqr = { 0 };
for ( u_cnt = i_height; u_cnt--; )
for( u_cnt = i_height; u_cnt--; )
{
pix = LD_SB( p_pix );
p_pix += i_stride;
......@@ -515,7 +515,7 @@ static uint32_t sse_diff_8width_msa( uint8_t *p_src, int32_t i_src_stride,
v8i16 avg = { 0 };
v4i32 vec, var = { 0 };
for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
for( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; )
{
LD_UB4( p_src, i_src_stride, src0, src1