Revision 74fd9022 libavcodec/i386/flacdsp_mmx.c
libavcodec/i386/flacdsp_mmx.c  

29  29 
x86_reg i = n2*sizeof(int32_t); 
30  30 
x86_reg j = n2*sizeof(int32_t); 
31  31 
asm volatile( 
32 
"movsd %0, %%xmm7 \n\t" 

33 
"movapd "MANGLE(ff_pd_1)", %%xmm6 \n\t"


34 
"movapd "MANGLE(ff_pd_2)", %%xmm5 \n\t"


35 
"movlhps %%xmm7, %%xmm7 \n\t" 

36 
"subpd %%xmm5, %%xmm7 \n\t" 

37 
"addsd %%xmm6, %%xmm7 \n\t" 

32 
"movsd %0, %%xmm7 \n\t"


33 
"movapd "MANGLE(ff_pd_1)", %%xmm6 \n\t"


34 
"movapd "MANGLE(ff_pd_2)", %%xmm5 \n\t"


35 
"movlhps %%xmm7, %%xmm7 \n\t"


36 
"subpd %%xmm5, %%xmm7 \n\t"


37 
"addsd %%xmm6, %%xmm7 \n\t"


38  38 
::"m"(c) 
39  39 
); 
40  40 
#define WELCH(MOVPD, offset)\ 
41  41 
asm volatile(\ 
42 
"1: \n\t"\ 

43 
"movapd %%xmm7, %%xmm1 \n\t"\ 

44 
"mulpd %%xmm1, %%xmm1 \n\t"\ 

45 
"movapd %%xmm6, %%xmm0 \n\t"\ 

46 
"subpd %%xmm1, %%xmm0 \n\t"\ 

47 
"pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\ 

48 
"cvtpi2pd (%3,%0), %%xmm2 \n\t"\ 

49 
"cvtpi2pd "#offset"*4(%3,%1), %%xmm3 \n\t"\ 

50 
"mulpd %%xmm0, %%xmm2 \n\t"\ 

51 
"mulpd %%xmm1, %%xmm3 \n\t"\ 

52 
"movapd %%xmm2, (%2,%0,2) \n\t"\ 

42 
"1: \n\t"\


43 
"movapd %%xmm7, %%xmm1 \n\t"\


44 
"mulpd %%xmm1, %%xmm1 \n\t"\


45 
"movapd %%xmm6, %%xmm0 \n\t"\


46 
"subpd %%xmm1, %%xmm0 \n\t"\


47 
"pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\


48 
"cvtpi2pd (%3,%0), %%xmm2 \n\t"\


49 
"cvtpi2pd "#offset"*4(%3,%1), %%xmm3 \n\t"\


50 
"mulpd %%xmm0, %%xmm2 \n\t"\


51 
"mulpd %%xmm1, %%xmm3 \n\t"\


52 
"movapd %%xmm2, (%2,%0,2) \n\t"\


53  53 
MOVPD" %%xmm3, "#offset"*8(%2,%1,2) \n\t"\ 
54 
"subpd %%xmm5, %%xmm7 \n\t"\ 

55 
"sub $8, %1 \n\t"\ 

56 
"add $8, %0 \n\t"\ 

57 
"jl 1b \n\t"\ 

54 
"subpd %%xmm5, %%xmm7 \n\t"\


55 
"sub $8, %1 \n\t"\


56 
"add $8, %0 \n\t"\


57 
"jl 1b \n\t"\


58  58 
:"+&r"(i), "+&r"(j)\ 
59  59 
:"r"(w_data+n2), "r"(data+n2)\ 
60  60 
); 
...  ...  
85  85 
x86_reg i = len*sizeof(double); 
86  86 
if(j == lag2) { 
87  87 
asm volatile( 
88 
"movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"


89 
"movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"


90 
"movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t"


91 
"1: \n\t" 

92 
"movapd (%4,%0), %%xmm3 \n\t" 

93 
"movupd 8(%5,%0), %%xmm4 \n\t" 

94 
"movapd (%5,%0), %%xmm5 \n\t" 

95 
"mulpd %%xmm3, %%xmm4 \n\t" 

96 
"mulpd %%xmm3, %%xmm5 \n\t" 

97 
"mulpd 16(%5,%0), %%xmm3 \n\t" 

98 
"addpd %%xmm4, %%xmm1 \n\t" 

99 
"addpd %%xmm5, %%xmm0 \n\t" 

100 
"addpd %%xmm3, %%xmm2 \n\t" 

101 
"add $16, %0 \n\t" 

102 
"jl 1b \n\t" 

103 
"movhlps %%xmm0, %%xmm3 \n\t" 

104 
"movhlps %%xmm1, %%xmm4 \n\t" 

105 
"movhlps %%xmm2, %%xmm5 \n\t" 

106 
"addsd %%xmm3, %%xmm0 \n\t" 

107 
"addsd %%xmm4, %%xmm1 \n\t" 

108 
"addsd %%xmm5, %%xmm2 \n\t" 

109 
"movsd %%xmm0, %1 \n\t" 

110 
"movsd %%xmm1, %2 \n\t" 

111 
"movsd %%xmm2, %3 \n\t" 

88 
"movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"


89 
"movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"


90 
"movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t"


91 
"1: \n\t"


92 
"movapd (%4,%0), %%xmm3 \n\t"


93 
"movupd 8(%5,%0), %%xmm4 \n\t"


94 
"movapd (%5,%0), %%xmm5 \n\t"


95 
"mulpd %%xmm3, %%xmm4 \n\t"


96 
"mulpd %%xmm3, %%xmm5 \n\t"


97 
"mulpd 16(%5,%0), %%xmm3 \n\t"


98 
"addpd %%xmm4, %%xmm1 \n\t"


99 
"addpd %%xmm5, %%xmm0 \n\t"


100 
"addpd %%xmm3, %%xmm2 \n\t"


101 
"add $16, %0 \n\t"


102 
"jl 1b \n\t"


103 
"movhlps %%xmm0, %%xmm3 \n\t"


104 
"movhlps %%xmm1, %%xmm4 \n\t"


105 
"movhlps %%xmm2, %%xmm5 \n\t"


106 
"addsd %%xmm3, %%xmm0 \n\t"


107 
"addsd %%xmm4, %%xmm1 \n\t"


108 
"addsd %%xmm5, %%xmm2 \n\t"


109 
"movsd %%xmm0, %1 \n\t"


110 
"movsd %%xmm1, %2 \n\t"


111 
"movsd %%xmm2, %3 \n\t"


112  112 
:"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2]) 
113  113 
:"r"(data1+len), "r"(data1+lenj) 
114  114 
); 
115  115 
} else { 
116  116 
asm volatile( 
117 
"movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"


118 
"movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"


119 
"1: \n\t" 

120 
"movapd (%3,%0), %%xmm3 \n\t" 

121 
"movupd 8(%4,%0), %%xmm4 \n\t" 

122 
"mulpd %%xmm3, %%xmm4 \n\t" 

123 
"mulpd (%4,%0), %%xmm3 \n\t" 

124 
"addpd %%xmm4, %%xmm1 \n\t" 

125 
"addpd %%xmm3, %%xmm0 \n\t" 

126 
"add $16, %0 \n\t" 

127 
"jl 1b \n\t" 

128 
"movhlps %%xmm0, %%xmm3 \n\t" 

129 
"movhlps %%xmm1, %%xmm4 \n\t" 

130 
"addsd %%xmm3, %%xmm0 \n\t" 

131 
"addsd %%xmm4, %%xmm1 \n\t" 

132 
"movsd %%xmm0, %1 \n\t" 

133 
"movsd %%xmm1, %2 \n\t" 

117 
"movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"


118 
"movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"


119 
"1: \n\t"


120 
"movapd (%3,%0), %%xmm3 \n\t"


121 
"movupd 8(%4,%0), %%xmm4 \n\t"


122 
"mulpd %%xmm3, %%xmm4 \n\t"


123 
"mulpd (%4,%0), %%xmm3 \n\t"


124 
"addpd %%xmm4, %%xmm1 \n\t"


125 
"addpd %%xmm3, %%xmm0 \n\t"


126 
"add $16, %0 \n\t"


127 
"jl 1b \n\t"


128 
"movhlps %%xmm0, %%xmm3 \n\t"


129 
"movhlps %%xmm1, %%xmm4 \n\t"


130 
"addsd %%xmm3, %%xmm0 \n\t"


131 
"addsd %%xmm4, %%xmm1 \n\t"


132 
"movsd %%xmm0, %1 \n\t"


133 
"movsd %%xmm1, %2 \n\t"


134  134 
:"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]) 
135  135 
:"r"(data1+len), "r"(data1+lenj) 
136  136 
); 
Also available in: Unified diff