A modern Music Player Daemon based on Rockbox open source high quality audio player
libadwaita audio rust zig deno mpris rockbox mpd
at master 300 lines 10 kB view raw
1/*************************************************************************** 2 * __________ __ ___. 3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___ 4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / 5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < 6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ 7 * \/ \/ \/ \/ \/ 8 * $Id$ 9 * 10 * Copyright (C) 2009 by Jens Arnold 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 2 15 * of the License, or (at your option) any later version. 16 * 17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 18 * KIND, either express or implied. 19 * 20 ****************************************************************************/ 21 22#include "config.h" 23 24 .syntax unified 25 26 .global mpeg2_idct_copy 27 .type mpeg2_idct_copy, %function 28 .global mpeg2_idct_add 29 .type mpeg2_idct_add, %function 30 31/* Custom calling convention: 32 * r0 contains block pointer and is non-volatile 33 * all non-volatile c context saved and restored on its behalf 34 */ 35.idct: 36 str lr, [sp, #-4]! @ lr is used 37 add r1, r0, #128 @ secondary, transposed temp buffer 38 mov r14, #8 @ loop counter 39 40.row_loop: 41 ldmia r0!, {r2, r3, r10, r11} @ fetch f0, f2, f4, f6, f1, f3, f5, f7 42 ldrd r4, L_W1357 @ load W1, W3, W5, W7 43 44 smuad r6, r4, r10 @ b0 = W1 * f1 + W3 * f3 45 smultt r7, r5, r10 @ -b1 = W7 * f3 46 smulbt r8, r4, r10 @ -b2 = W1 * f3 47 48 smusdx r9, r10, r5 @ b3 = f1 * W7 - f3 * W5 49 smlabb r7, r4, r11, r7 @ -b1 += W1 * f5 50 rsb r8, r8, #0 @ b2 = -b2 51 smlabb r8, r5, r10, r8 @ b2 += W5 * f1 52 53 smlad r6, r5, r11, r6 @ b0 += W5 * f5 + W7 * f7 54 smlabt r7, r5, r11, r7 @ -b1 += W5 * f7 55 smlatb r8, r5, r11, r8 @ b2 += W7 * f5 56 57 smlsdx r9, r11, r4, r9 @ b3 += f5 * W3 - f7 * W1 58 rsb r7, r7, #0 @ b1 = -b1 59 smlatb r7, r4, r10, r7 @ b1 += W3 * f1 60 smlatt r8, r4, r11, r8 @ b2 += W3 * f7 61 62 ldrd r4, L_W0246 @ load W0, W2, W4, W6 63 add r2, r2, #1 @ f0 += 1 64 65 smulbb r10, r5, r3 @ a0' = W4 * f4 66 smultt r12, r5, r3 @ a3' = W6 * f6 67 smultt r3, r4, r3 @ -a2' = W2 * f6 68 69 rsb r11, r10, #0 @ a1' = -W4 * f4 70 smlabb r10, r4, r2, r10 @ a0' += W0 * f0 71 smlabb r11, r4, r2, r11 @ a1' += W0 * f0 72 smlatt r12, r4, r2, r12 @ a3' += W2 * f2 73 rsb r3, r3, #0 @ a2' = -a2' 74 smlatt r3, r5, r2, r3 @ a2' += W6 * f2 75 76 add r10, r10, r12 @ a0 = a0' + a3' 77 sub r12, r10, r12, lsl #1 @ a3 = a0 - 2 * a3' 78 add r11, r11, r3 @ a1 = a1' + a2' 79 sub r3, r11, r3, lsl #1 @ a2 = a1 - 2 * a2' 80 81 subs r14, r14, #1 @ decrease loop count 82 83 @ Special store order for making the column pass calculate columns in 84 @ the order 0-2-1-3-4-6-5-7, allowing for uxtab16 use in later stages. 85 sub r2, r10, r6 @ block[7] = (a0 - b0) 86 mov r2, r2, asr #12 @ >> 12 87 strh r2, [r1, #7*16] 88 sub r2, r11, r7 @ block[6] = (a1 - b1) 89 mov r2, r2, asr #12 @ >> 12 90 strh r2, [r1, #5*16] 91 sub r2, r3, r8 @ block[5] = (a2 - b2) 92 mov r2, r2, asr #12 @ >> 12 93 strh r2, [r1, #6*16] 94 sub r2, r12, r9 @ block[4] = (a3 - b3) 95 mov r2, r2, asr #12 @ >> 12 96 strh r2, [r1, #4*16] 97 add r2, r12, r9 @ block[3] = (a3 + b3) 98 mov r2, r2, asr #12 @ >> 12 99 strh r2, [r1, #3*16] 100 add r2, r3, r8 @ block[2] = (a2 + b2) 101 mov r2, r2, asr #12 @ >> 12 102 strh r2, [r1, #1*16] 103 add r2, r11, r7 @ block[1] = (a1 + b1) 104 mov r2, r2, asr #12 @ >> 12 105 strh r2, [r1, #2*16] 106 add r2, r10, r6 @ block[0] = (a0 + b0) 107 mov r2, r2, asr #12 @ >> 12 108 strh r2, [r1], #2 @ advance to next temp column 109 110 bne .row_loop 111 b .col_start 112 113 @placed here because of ldrd's offset limit 114L_W1357: 115 .short 2841 116 .short 2408 117 .short 1609 118 .short 565 119 120L_W0246: 121 .short 2048 122 .short 2676 123 .short 2048 124 .short 1108 125 126.col_start: 127 @ r0 now points to the temp buffer, where we need it. 128 sub r1, r1, #128+16 @ point r1 back to the input block 129 mov r14, #8 @ loop counter 130 131.col_loop: 132 ldmia r0!, {r2, r3, r10, r11} @ fetch f0, f2, f4, f6, f1, f3, f5, f7 133 ldrd r4, L_W1357 @ load W1, W3, W5, W7 134 135 smuad r6, r4, r10 @ b0 = W1 * f1 + W3 * f3 136 smultt r7, r5, r10 @ -b1 = W7 * f3 137 smulbt r8, r4, r10 @ -b2 = W1 * f3 138 139 smusdx r9, r10, r5 @ b3 = f1 * W7 - f3 * W5 140 smlabb r7, r4, r11, r7 @ -b1 += W1 * f5 141 rsb r8, r8, #0 @ b2 = -b2 142 smlabb r8, r5, r10, r8 @ b2 += W5 * f1 143 144 smlad r6, r5, r11, r6 @ b0 += W5 * f5 + W7 * f7 145 smlabt r7, r5, r11, r7 @ -b1 += W5 * f7 146 smlatb r8, r5, r11, r8 @ b2 += W7 * f5 147 148 smlsdx r9, r11, r4, r9 @ b3 += f5 * W3 - f7 * W1 149 rsb r7, r7, #0 @ b1 = -b1 150 smlatb r7, r4, r10, r7 @ b1 += W3 * f1 151 smlatt r8, r4, r11, r8 @ b2 += W3 * f7 152 153 ldrd r4, L_W0246 @ load W0, W2, W4, W6 154 add r2, r2, #32 @ DC offset: 0.5 155 156 smulbb r10, r5, r3 @ a0' = W4 * f4 157 smultt r12, r5, r3 @ a3' = W6 * f6 158 smultt r3, r4, r3 @ -a2' = W2 * f6 159 160 rsb r11, r10, #0 @ a1' = -W4 * f4 161 smlabb r10, r4, r2, r10 @ a0' += W0 * f0 162 smlabb r11, r4, r2, r11 @ a1' += W0 * f0 163 smlatt r12, r4, r2, r12 @ a3' += W2 * f2 164 rsb r3, r3, #0 @ a2' = -a2' 165 smlatt r3, r5, r2, r3 @ a2' += W6 * f2 166 167 add r10, r10, r12 @ a0 = a0' + a3' 168 sub r12, r10, r12, lsl #1 @ a3 = a0 - 2 * a3' 169 add r11, r11, r3 @ a1 = a1' + a2' 170 sub r3, r11, r3, lsl #1 @ a2 = a1 - 2 * a2' 171 172 subs r14, r14, #1 @ decrease loop count 173 174 sub r2, r10, r6 @ block[7] = (a0 - b0) 175 mov r2, r2, asr #17 @ >> 17 176 strh r2, [r1, #7*16] 177 sub r2, r11, r7 @ block[6] = (a1 - b1) 178 mov r2, r2, asr #17 @ >> 17 179 strh r2, [r1, #6*16] 180 sub r2, r3, r8 @ block[5] = (a2 - b2) 181 mov r2, r2, asr #17 @ >> 17 182 strh r2, [r1, #5*16] 183 sub r2, r12, r9 @ block[4] = (a3 - b3) 184 mov r2, r2, asr #17 @ >> 17 185 strh r2, [r1, #4*16] 186 add r2, r12, r9 @ block[3] = (a3 + b3) 187 mov r2, r2, asr #17 @ >> 17 188 strh r2, [r1, #3*16] 189 add r2, r3, r8 @ block[2] = (a2 + b2) 190 mov r2, r2, asr #17 @ >> 17 191 strh r2, [r1, #2*16] 192 add r2, r11, r7 @ block[1] = (a1 + b1) 193 mov r2, r2, asr #17 @ >> 17 194 strh r2, [r1, #1*16] 195 add r2, r10, r6 @ block[0] = (a0 + b0) 196 mov r2, r2, asr #17 @ >> 17 197 strh r2, [r1], #2 @ advance to next column 198 199 bne .col_loop 200 201 sub r0, r0, #256 @ point r0 back to the input block 202 ldr pc, [sp], #4 203 204 205mpeg2_idct_copy: 206 stmfd sp!, {r1-r2, r4-r11, lr} 207 bl .idct 208 ldmfd sp!, {r1-r2} 209 210 add r3, r0, #128 211 mov r8, #0 212 mov r9, #0 213 mov r10, #0 214 mov r11, #0 2151: @ idct data is in order 0-2-1-3-4-6-5-7, 216 ldmia r0, {r4-r7} @ see above 217 stmia r0!, {r8-r11} 218 usat16 r4, #8, r4 219 usat16 r5, #8, r5 220 orr r4, r4, r5, lsl #8 221 usat16 r6, #8, r6 222 usat16 r7, #8, r7 223 orr r5, r6, r7, lsl #8 224 strd r4, [r1] @ r4, r5 225 add r1, r1, r2 226 cmp r0, r3 227 blo 1b 228 229 ldmfd sp!, {r4-r11, pc} 230 231mpeg2_idct_add: 232 cmp r0, #129 233 mov r0, r1 234 ldrsheq r1, [r0, #0] 235 bne 1f 236 and r1, r1, #0x70 237 cmp r1, #0x40 238 bne 3f 2391: 240 stmfd sp!, {r2-r11, lr} 241 bl .idct 242 ldmfd sp!, {r1-r2} 243 244 add r3, r0, #128 245 mov r10, #0 246 mov r11, #0 247 mov r12, #0 248 mov lr, #0 249 ldrd r8, [r1] @ r8, r9 2502: @ idct data is in order 0-2-1-3-4-6-5-7, 251 ldmia r0, {r4-r7} @ see above 252 stmia r0!, {r10-r12, lr} 253 uxtab16 r4, r4, r8 254 uxtab16 r5, r5, r8, ror #8 255 usat16 r4, #8, r4 256 usat16 r5, #8, r5 257 orr r4, r4, r5, lsl #8 258 uxtab16 r6, r6, r9 259 uxtab16 r7, r7, r9, ror #8 260 usat16 r6, #8, r6 261 usat16 r7, #8, r7 262 orr r5, r6, r7, lsl #8 263 strd r4, [r1] @ r4, r5 264 add r1, r1, r2 265 cmp r0, r3 266 ldrdlo r8, [r1] @ r8, r9 267 blo 2b 268 269 ldmfd sp!, {r4-r11, pc} 270 2713: 272 stmfd sp!, {r4, lr} 273 ldrsh r4, [r0, #0] @ r4 = block[0] 274 mov r12, #0 275 strh r12, [r0, #0] @ block[0] = 0 276 strh r12, [r0, #126] @ block[63] = 0 277 add r4, r4, #64 278 mov r4, r4, asr #7 @ r4 = DC 279 mov r4, r4, lsl #16 @ spread to 2 halfwords 280 orr r4, r4, r4, lsr #16 281 ldrd r0, [r2] @ r0, r1 282 add r12, r2, r3, asl #3 2834: 284 uxtab16 lr, r4, r0, ror #8 285 uxtab16 r0, r4, r0 286 usat16 lr, #8, lr 287 usat16 r0, #8, r0 288 orr r0, r0, lr, lsl #8 289 uxtab16 lr, r4, r1, ror #8 290 uxtab16 r1, r4, r1 291 usat16 lr, #8, lr 292 usat16 r1, #8, r1 293 orr r1, r1, lr, lsl #8 294 strd r0, [r2] @ r0, r1 295 add r2, r2, r3 296 cmp r2, r12 297 ldrdlo r0, [r2] @ r0, r1 298 blo 4b 299 300 ldmfd sp!, {r4, pc}