A modern Music Player Daemon based on Rockbox open source high quality audio player
libadwaita
audio
rust
zig
deno
mpris
rockbox
mpd
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2009 by Jens Arnold
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21
22#include "config.h"
23
24 .syntax unified
25
26 .global mpeg2_idct_copy
27 .type mpeg2_idct_copy, %function
28 .global mpeg2_idct_add
29 .type mpeg2_idct_add, %function
30
31/* Custom calling convention:
32 * r0 contains block pointer and is non-volatile
33 * all non-volatile c context saved and restored on its behalf
34 */
35.idct:
36 str lr, [sp, #-4]! @ lr is used
37 add r1, r0, #128 @ secondary, transposed temp buffer
38 mov r14, #8 @ loop counter
39
40.row_loop:
41 ldmia r0!, {r2, r3, r10, r11} @ fetch f0, f2, f4, f6, f1, f3, f5, f7
42 ldrd r4, L_W1357 @ load W1, W3, W5, W7
43
44 smuad r6, r4, r10 @ b0 = W1 * f1 + W3 * f3
45 smultt r7, r5, r10 @ -b1 = W7 * f3
46 smulbt r8, r4, r10 @ -b2 = W1 * f3
47
48 smusdx r9, r10, r5 @ b3 = f1 * W7 - f3 * W5
49 smlabb r7, r4, r11, r7 @ -b1 += W1 * f5
50 rsb r8, r8, #0 @ b2 = -b2
51 smlabb r8, r5, r10, r8 @ b2 += W5 * f1
52
53 smlad r6, r5, r11, r6 @ b0 += W5 * f5 + W7 * f7
54 smlabt r7, r5, r11, r7 @ -b1 += W5 * f7
55 smlatb r8, r5, r11, r8 @ b2 += W7 * f5
56
57 smlsdx r9, r11, r4, r9 @ b3 += f5 * W3 - f7 * W1
58 rsb r7, r7, #0 @ b1 = -b1
59 smlatb r7, r4, r10, r7 @ b1 += W3 * f1
60 smlatt r8, r4, r11, r8 @ b2 += W3 * f7
61
62 ldrd r4, L_W0246 @ load W0, W2, W4, W6
63 add r2, r2, #1 @ f0 += 1
64
65 smulbb r10, r5, r3 @ a0' = W4 * f4
66 smultt r12, r5, r3 @ a3' = W6 * f6
67 smultt r3, r4, r3 @ -a2' = W2 * f6
68
69 rsb r11, r10, #0 @ a1' = -W4 * f4
70 smlabb r10, r4, r2, r10 @ a0' += W0 * f0
71 smlabb r11, r4, r2, r11 @ a1' += W0 * f0
72 smlatt r12, r4, r2, r12 @ a3' += W2 * f2
73 rsb r3, r3, #0 @ a2' = -a2'
74 smlatt r3, r5, r2, r3 @ a2' += W6 * f2
75
76 add r10, r10, r12 @ a0 = a0' + a3'
77 sub r12, r10, r12, lsl #1 @ a3 = a0 - 2 * a3'
78 add r11, r11, r3 @ a1 = a1' + a2'
79 sub r3, r11, r3, lsl #1 @ a2 = a1 - 2 * a2'
80
81 subs r14, r14, #1 @ decrease loop count
82
83 @ Special store order for making the column pass calculate columns in
84 @ the order 0-2-1-3-4-6-5-7, allowing for uxtab16 use in later stages.
85 sub r2, r10, r6 @ block[7] = (a0 - b0)
86 mov r2, r2, asr #12 @ >> 12
87 strh r2, [r1, #7*16]
88 sub r2, r11, r7 @ block[6] = (a1 - b1)
89 mov r2, r2, asr #12 @ >> 12
90 strh r2, [r1, #5*16]
91 sub r2, r3, r8 @ block[5] = (a2 - b2)
92 mov r2, r2, asr #12 @ >> 12
93 strh r2, [r1, #6*16]
94 sub r2, r12, r9 @ block[4] = (a3 - b3)
95 mov r2, r2, asr #12 @ >> 12
96 strh r2, [r1, #4*16]
97 add r2, r12, r9 @ block[3] = (a3 + b3)
98 mov r2, r2, asr #12 @ >> 12
99 strh r2, [r1, #3*16]
100 add r2, r3, r8 @ block[2] = (a2 + b2)
101 mov r2, r2, asr #12 @ >> 12
102 strh r2, [r1, #1*16]
103 add r2, r11, r7 @ block[1] = (a1 + b1)
104 mov r2, r2, asr #12 @ >> 12
105 strh r2, [r1, #2*16]
106 add r2, r10, r6 @ block[0] = (a0 + b0)
107 mov r2, r2, asr #12 @ >> 12
108 strh r2, [r1], #2 @ advance to next temp column
109
110 bne .row_loop
111 b .col_start
112
113 @placed here because of ldrd's offset limit
114L_W1357:
115 .short 2841
116 .short 2408
117 .short 1609
118 .short 565
119
120L_W0246:
121 .short 2048
122 .short 2676
123 .short 2048
124 .short 1108
125
126.col_start:
127 @ r0 now points to the temp buffer, where we need it.
128 sub r1, r1, #128+16 @ point r1 back to the input block
129 mov r14, #8 @ loop counter
130
131.col_loop:
132 ldmia r0!, {r2, r3, r10, r11} @ fetch f0, f2, f4, f6, f1, f3, f5, f7
133 ldrd r4, L_W1357 @ load W1, W3, W5, W7
134
135 smuad r6, r4, r10 @ b0 = W1 * f1 + W3 * f3
136 smultt r7, r5, r10 @ -b1 = W7 * f3
137 smulbt r8, r4, r10 @ -b2 = W1 * f3
138
139 smusdx r9, r10, r5 @ b3 = f1 * W7 - f3 * W5
140 smlabb r7, r4, r11, r7 @ -b1 += W1 * f5
141 rsb r8, r8, #0 @ b2 = -b2
142 smlabb r8, r5, r10, r8 @ b2 += W5 * f1
143
144 smlad r6, r5, r11, r6 @ b0 += W5 * f5 + W7 * f7
145 smlabt r7, r5, r11, r7 @ -b1 += W5 * f7
146 smlatb r8, r5, r11, r8 @ b2 += W7 * f5
147
148 smlsdx r9, r11, r4, r9 @ b3 += f5 * W3 - f7 * W1
149 rsb r7, r7, #0 @ b1 = -b1
150 smlatb r7, r4, r10, r7 @ b1 += W3 * f1
151 smlatt r8, r4, r11, r8 @ b2 += W3 * f7
152
153 ldrd r4, L_W0246 @ load W0, W2, W4, W6
154 add r2, r2, #32 @ DC offset: 0.5
155
156 smulbb r10, r5, r3 @ a0' = W4 * f4
157 smultt r12, r5, r3 @ a3' = W6 * f6
158 smultt r3, r4, r3 @ -a2' = W2 * f6
159
160 rsb r11, r10, #0 @ a1' = -W4 * f4
161 smlabb r10, r4, r2, r10 @ a0' += W0 * f0
162 smlabb r11, r4, r2, r11 @ a1' += W0 * f0
163 smlatt r12, r4, r2, r12 @ a3' += W2 * f2
164 rsb r3, r3, #0 @ a2' = -a2'
165 smlatt r3, r5, r2, r3 @ a2' += W6 * f2
166
167 add r10, r10, r12 @ a0 = a0' + a3'
168 sub r12, r10, r12, lsl #1 @ a3 = a0 - 2 * a3'
169 add r11, r11, r3 @ a1 = a1' + a2'
170 sub r3, r11, r3, lsl #1 @ a2 = a1 - 2 * a2'
171
172 subs r14, r14, #1 @ decrease loop count
173
174 sub r2, r10, r6 @ block[7] = (a0 - b0)
175 mov r2, r2, asr #17 @ >> 17
176 strh r2, [r1, #7*16]
177 sub r2, r11, r7 @ block[6] = (a1 - b1)
178 mov r2, r2, asr #17 @ >> 17
179 strh r2, [r1, #6*16]
180 sub r2, r3, r8 @ block[5] = (a2 - b2)
181 mov r2, r2, asr #17 @ >> 17
182 strh r2, [r1, #5*16]
183 sub r2, r12, r9 @ block[4] = (a3 - b3)
184 mov r2, r2, asr #17 @ >> 17
185 strh r2, [r1, #4*16]
186 add r2, r12, r9 @ block[3] = (a3 + b3)
187 mov r2, r2, asr #17 @ >> 17
188 strh r2, [r1, #3*16]
189 add r2, r3, r8 @ block[2] = (a2 + b2)
190 mov r2, r2, asr #17 @ >> 17
191 strh r2, [r1, #2*16]
192 add r2, r11, r7 @ block[1] = (a1 + b1)
193 mov r2, r2, asr #17 @ >> 17
194 strh r2, [r1, #1*16]
195 add r2, r10, r6 @ block[0] = (a0 + b0)
196 mov r2, r2, asr #17 @ >> 17
197 strh r2, [r1], #2 @ advance to next column
198
199 bne .col_loop
200
201 sub r0, r0, #256 @ point r0 back to the input block
202 ldr pc, [sp], #4
203
204
205mpeg2_idct_copy:
206 stmfd sp!, {r1-r2, r4-r11, lr}
207 bl .idct
208 ldmfd sp!, {r1-r2}
209
210 add r3, r0, #128
211 mov r8, #0
212 mov r9, #0
213 mov r10, #0
214 mov r11, #0
2151: @ idct data is in order 0-2-1-3-4-6-5-7,
216 ldmia r0, {r4-r7} @ see above
217 stmia r0!, {r8-r11}
218 usat16 r4, #8, r4
219 usat16 r5, #8, r5
220 orr r4, r4, r5, lsl #8
221 usat16 r6, #8, r6
222 usat16 r7, #8, r7
223 orr r5, r6, r7, lsl #8
224 strd r4, [r1] @ r4, r5
225 add r1, r1, r2
226 cmp r0, r3
227 blo 1b
228
229 ldmfd sp!, {r4-r11, pc}
230
231mpeg2_idct_add:
232 cmp r0, #129
233 mov r0, r1
234 ldrsheq r1, [r0, #0]
235 bne 1f
236 and r1, r1, #0x70
237 cmp r1, #0x40
238 bne 3f
2391:
240 stmfd sp!, {r2-r11, lr}
241 bl .idct
242 ldmfd sp!, {r1-r2}
243
244 add r3, r0, #128
245 mov r10, #0
246 mov r11, #0
247 mov r12, #0
248 mov lr, #0
249 ldrd r8, [r1] @ r8, r9
2502: @ idct data is in order 0-2-1-3-4-6-5-7,
251 ldmia r0, {r4-r7} @ see above
252 stmia r0!, {r10-r12, lr}
253 uxtab16 r4, r4, r8
254 uxtab16 r5, r5, r8, ror #8
255 usat16 r4, #8, r4
256 usat16 r5, #8, r5
257 orr r4, r4, r5, lsl #8
258 uxtab16 r6, r6, r9
259 uxtab16 r7, r7, r9, ror #8
260 usat16 r6, #8, r6
261 usat16 r7, #8, r7
262 orr r5, r6, r7, lsl #8
263 strd r4, [r1] @ r4, r5
264 add r1, r1, r2
265 cmp r0, r3
266 ldrdlo r8, [r1] @ r8, r9
267 blo 2b
268
269 ldmfd sp!, {r4-r11, pc}
270
2713:
272 stmfd sp!, {r4, lr}
273 ldrsh r4, [r0, #0] @ r4 = block[0]
274 mov r12, #0
275 strh r12, [r0, #0] @ block[0] = 0
276 strh r12, [r0, #126] @ block[63] = 0
277 add r4, r4, #64
278 mov r4, r4, asr #7 @ r4 = DC
279 mov r4, r4, lsl #16 @ spread to 2 halfwords
280 orr r4, r4, r4, lsr #16
281 ldrd r0, [r2] @ r0, r1
282 add r12, r2, r3, asl #3
2834:
284 uxtab16 lr, r4, r0, ror #8
285 uxtab16 r0, r4, r0
286 usat16 lr, #8, lr
287 usat16 r0, #8, r0
288 orr r0, r0, lr, lsl #8
289 uxtab16 lr, r4, r1, ror #8
290 uxtab16 r1, r4, r1
291 usat16 lr, #8, lr
292 usat16 r1, #8, r1
293 orr r1, r1, lr, lsl #8
294 strd r0, [r2] @ r0, r1
295 add r2, r2, r3
296 cmp r2, r12
297 ldrdlo r0, [r2] @ r0, r1
298 blo 4b
299
300 ldmfd sp!, {r4, pc}