A modern Music Player Daemon based on Rockbox open source high quality audio player
libadwaita audio rust zig deno mpris rockbox mpd
at master 575 lines 18 kB view raw
1/*************************************************************************** 2 * __________ __ ___. 3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___ 4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / 5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < 6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ 7 * \/ \/ \/ \/ \/ 8 * $Id$ 9 * 10 * Copyright (C) 2007 Jens Arnold 11 * Based on the work of Karim Boucher and Rani Hod 12 * 13 * This program is free software; you can redistribute it and/or 14 * modify it under the terms of the GNU General Public License 15 * as published by the Free Software Foundation; either version 2 16 * of the License, or (at your option) any later version. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 ****************************************************************************/ 22 23 .global mpeg2_idct_copy 24 .type mpeg2_idct_copy, @function 25 .global mpeg2_idct_add 26 .type mpeg2_idct_add, @function 27 28 /* The IDCT itself. 29 * Input: %a0: block pointer 30 * Caller must save all registers. */ 31 .align 2 32.idct: 33 move.l %a0, %a6 34 35 move.l #0, %macsr | signed integer mode 36 37 move.l #((2048<<16)+2841), %a0 | W0, W1 38 move.l #((2676<<16)+2408), %a1 | W2, W3 39 move.l #((2048<<16)+1609), %a2 | W4, W5 40 move.l #((1108<<16)+ 565), %a3 | W6, W7 41 42 lea.l (128,%a6), %a4 | secondary, transposed temp buffer 43 moveq.l #8, %d3 | loop counter 44 45.row_loop: 46 movem.l (%a6), %d0-%d2/%a5 | fetch (f0, f2, f4, f6, f1, f3, f5, f7) 47 48 mac.w %a0l, %d2u, %acc0 | %acc0 = W1 * f1 49 mac.w %a1l, %d2l, %acc0 | + W3 * f3 50 mac.w %a2l, %a5u, %acc0 | + W5 * f5 51 mac.w %a3l, %a5l, %acc0 | + W7 * f7 52 53 mac.w %a1l, %d2u, %acc1 | %acc1 = W3 * f1 54 msac.w %a3l, %d2l, %acc1 | - W7 * f3 55 msac.w %a0l, %a5u, %acc1 | - W1 * f5 56 msac.w %a2l, %a5l, %acc1 | - W5 * f7 57 58 mac.w %a2l, %d2u, %acc2 | %acc2 = W5 * f1 59 msac.w %a0l, %d2l, %acc2 | - W1 * f3 60 mac.w %a3l, %a5u, %acc2 | + W7 * f5 61 mac.w %a1l, %a5l, %acc2 | + W3 * f7 62 63 mac.w %a3l, %d2u, %acc3 | %acc3 = W7 * f1 64 msac.w %a2l, %d2l, %acc3 | - W5 * f3 65 mac.w %a1l, %a5u, %acc3 | + W3 * f5 66 msac.w %a0l, %a5l, %acc3 | - W1 * f7 67 68 lea.l (16,%a6), %a6 | Advance to next row; put here to fill EMAC latency 69 add.l #(1<<16), %d0 | f0 += 1; 70 71 movclr.l %acc0, %d4 | b0 72 movclr.l %acc1, %d5 | b1 73 movclr.l %acc2, %d6 | b2 74 movclr.l %acc3, %d7 | b3 75 76 mac.w %a0u, %d0u, %acc0 | %acc0 = W0 * f0 77 mac.w %a2u, %d1u, %acc0 | + W4 * f4 78 move.l %acc0, %acc3 79 mac.w %a1u, %d0l, %acc0 | + W2 * f2 80 mac.w %a3u, %d1l, %acc0 | + W6 * f6 81 82 mac.w %a0u, %d0u, %acc1 | %acc1 = W0 * f0 83 msac.w %a2u, %d1u, %acc1 | - W4 * f4 84 move.l %acc1, %acc2 85 mac.w %a3u, %d0l, %acc1 | + W6 * f2 86 msac.w %a1u, %d1l, %acc1 | - W2 * f6 87 88 | ^ move.l %acc1, %acc2 %acc2 = W0 * f0 - W4 * f4 89 msac.w %a3u, %d0l, %acc2 | - W6 * f2 90 mac.w %a1u, %d1l, %acc2 | + W2 * f6 91 92 | ^ move.l %acc0, %acc3 %acc3 = W0 * f0 + W4 * f4 93 msac.w %a1u, %d0l, %acc3 | - W2 * f2 94 msac.w %a3u, %d1l, %acc3 | - W6 * f6 95 96 moveq.l #12, %d1 | shift amount 97 98 move.l %acc0, %d0 | block[7] = (a0 99 sub.l %d4,%d0 | - b0) 100 asr.l %d1, %d0 | >> 12 101 move.w %d0, (7*16,%a4) 102 103 move.l %acc1, %d0 | block[6] = (a1 104 sub.l %d5,%d0 | - b1) 105 asr.l %d1, %d0 | >> 12 106 move.w %d0, (6*16,%a4) 107 108 move.l %acc2, %d0 | block[5] = (a2 109 sub.l %d6,%d0 | - b2) 110 asr.l %d1, %d0 | >> 12 111 move.w %d0, (5*16,%a4) 112 113 move.l %acc3, %d0 | block[4] = (a3 114 sub.l %d7,%d0 | - b3) 115 asr.l %d1, %d0 | >> 12 116 move.w %d0, (4*16,%a4) 117 118 movclr.l %acc3, %d0 | block[3] = (a3 119 add.l %d7, %d0 | + b3) 120 asr.l %d1, %d0 | >> 12 121 move.w %d0, (3*16,%a4) 122 123 movclr.l %acc2, %d0 | block[2] = (a2 124 add.l %d6, %d0 | + b2) 125 asr.l %d1, %d0 | >> 12 126 move.w %d0, (2*16,%a4) 127 128 movclr.l %acc1, %d0 | block[1] = (a1 129 add.l %d5, %d0 | + b1) 130 asr.l %d1, %d0 | >> 12 131 move.w %d0, (1*16,%a4) 132 133 movclr.l %acc0, %d0 | block[0] = (a0 134 add.l %d4, %d0 | + b0) 135 asr.l %d1, %d0 | >> 12 136 move.w %d0, (%a4)+ | advance to next temp column 137 138 subq.l #1, %d3 | loop 8 times 139 bne.w .row_loop 140 141 | %a6 now points to the temp buffer, where we need it. 142 lea.l (-16-128,%a4), %a4 | point %a4 back to the input block 143 moveq.l #8, %d3 | loop counter 144 145.col_loop: 146 movem.l (%a6), %d0-%d2/%a5 | fetch (f0, f2, f4, f6, f1, f3, f5, f7) 147 148 mac.w %a0l, %d2u, %acc0 | %acc0 = W1 * f1 149 mac.w %a1l, %d2l, %acc0 | + W3 * f3 150 mac.w %a2l, %a5u, %acc0 | + W5 * f5 151 mac.w %a3l, %a5l, %acc0 | + W7 * f7 152 153 mac.w %a1l, %d2u, %acc1 | %acc1 = W3 * f1 154 msac.w %a3l, %d2l, %acc1 | - W7 * f3 155 msac.w %a0l, %a5u, %acc1 | - W1 * f5 156 msac.w %a2l, %a5l, %acc1 | - W5 * f7 157 158 mac.w %a2l, %d2u, %acc2 | %acc2 = W5 * f1 159 msac.w %a0l, %d2l, %acc2 | - W1 * f3 160 mac.w %a3l, %a5u, %acc2 | + W7 * f5 161 mac.w %a1l, %a5l, %acc2 | + W3 * f7 162 163 mac.w %a3l, %d2u, %acc3 | %acc3 = W7 * f1 164 msac.w %a2l, %d2l, %acc3 | - W5 * f3 165 mac.w %a1l, %a5u, %acc3 | + W3 * f5 166 msac.w %a0l, %a5l, %acc3 | - W1 * f7 167 168 lea.l (16,%a6), %a6 | Advance to next row; put here to fill EMAC latency 169 add.l #(32<<16), %d0 | DC offset: 0.5 170 171 movclr.l %acc0, %d4 | b0 172 movclr.l %acc1, %d5 | b1 173 movclr.l %acc2, %d6 | b2 174 movclr.l %acc3, %d7 | b3 175 176 mac.w %a0u, %d0u, %acc0 | %acc0 = W0 * f0 177 mac.w %a2u, %d1u, %acc0 | + W4 * f4 178 move.l %acc0, %acc3 179 mac.w %a1u, %d0l, %acc0 | + W2 * f2 180 mac.w %a3u, %d1l, %acc0 | + W6 * f6 181 182 mac.w %a0u, %d0u, %acc1 | %acc1 = W0 * f0 183 msac.w %a2u, %d1u, %acc1 | - W4 * f4 184 move.l %acc1, %acc2 185 mac.w %a3u, %d0l, %acc1 | + W6 * f2 186 msac.w %a1u, %d1l, %acc1 | - W2 * f6 187 188 | ^ move.l %acc1, %acc2 %acc2 = W0 * f0 - W4 * f4 189 msac.w %a3u, %d0l, %acc2 | - W6 * f2 190 mac.w %a1u, %d1l, %acc2 | + W2 * f6 191 192 | ^ move.l %acc0, %acc3 %acc3 = W0 * f0 + W4 * f4 193 msac.w %a1u, %d0l, %acc3 | - W2 * f2 194 msac.w %a3u, %d1l, %acc3 | - W6 * f6 195 196 moveq.l #17, %d1 | shift amount 197 198 move.l %acc0, %d0 | block[7] = (a0 199 sub.l %d4,%d0 | - b0) 200 asr.l %d1, %d0 | >> 17 201 move.w %d0, (7*16,%a4) 202 203 move.l %acc1, %d0 | block[6] = (a1 204 sub.l %d5,%d0 | - b1) 205 asr.l %d1, %d0 | >> 17 206 move.w %d0, (6*16,%a4) 207 208 move.l %acc2, %d0 | block[5] = (a2 209 sub.l %d6,%d0 | - b2) 210 asr.l %d1, %d0 | >> 17 211 move.w %d0, (5*16,%a4) 212 213 move.l %acc3, %d0 | block[4] = (a3 214 sub.l %d7,%d0 | - b3) 215 asr.l %d1, %d0 | >> 17 216 move.w %d0, (4*16,%a4) 217 218 movclr.l %acc3, %d0 | block[3] = (a3 219 add.l %d7, %d0 | + b3) 220 asr.l %d1, %d0 | >> 17 221 move.w %d0, (3*16,%a4) 222 223 movclr.l %acc2, %d0 | block[2] = (a2 224 add.l %d6, %d0 | + b2) 225 asr.l %d1, %d0 | >> 17 226 move.w %d0, (2*16,%a4) 227 228 movclr.l %acc1, %d0 | block[1] = (a1 229 add.l %d5, %d0 | + b1) 230 asr.l %d1, %d0 | >> 17 231 move.w %d0, (1*16,%a4) 232 233 movclr.l %acc0, %d0 | block[0] = (a0 234 add.l %d4, %d0 | + b0) 235 asr.l %d1, %d0 | >> 17 236 move.w %d0, (%a4)+ | advance to next column 237 238 subq.l #1, %d3 | loop 8 times 239 bne.w .col_loop 240 241 rts 242 243 .align 2 244 245mpeg2_idct_copy: 246 lea.l (-11*4,%sp), %sp 247 movem.l %d2-%d7/%a2-%a6, (%sp) | save some registers 248 move.l (11*4+4,%sp), %a0 | %a0 - block pointer for idct 249 250 bsr.w .idct | apply idct to block 251 movem.l (11*4+4,%sp), %a0-%a2 | %a0 - block pointer 252 | %a1 - destination pointer 253 | %a2 - stride 254 255 move.l #255, %d1 | preload constant for clipping 256 moveq.l #8, %d4 | loop counter 257 258.copy_clip_loop: 259 move.w (%a0), %d0 | load block[0] 260 ext.l %d0 | sign extend 261 cmp.l %d1, %d0 | overflow? 262 bls.b 1f 263 spl.b %d0 | yes: set appropriate limit value in low byte 2641: 265 move.b %d0, %d2 | collect output bytes 0..3 in %d2 266 lsl.l #8, %d2 267 268 move.w (2,%a0), %d0 | load block[1] 269 ext.l %d0 | sign extend 270 cmp.l %d1, %d0 | overflow? 271 bls.b 1f 272 spl.b %d0 | yes: set appropriate limit value in low byte 2731: 274 move.b %d0, %d2 | collect output bytes 0..3 in %d2 275 lsl.l #8, %d2 276 clr.l (%a0)+ | clear block[0] and block[1], 277 | %a0 now pointing to block[2] 278 move.w (%a0), %d0 | do b2 and b3 279 ext.l %d0 280 cmp.l %d1, %d0 281 bls.b 1f 282 spl.b %d0 2831: 284 move.b %d0, %d2 285 lsl.l #8, %d2 286 287 move.w (2,%a0), %d0 288 ext.l %d0 289 cmp.l %d1, %d0 290 bls.b 1f 291 spl.b %d0 2921: 293 move.b %d0, %d2 294 clr.l (%a0)+ 295 296 move.w (%a0), %d0 | do b4 and b5 297 ext.l %d0 298 cmp.l %d1, %d0 299 bls.b 1f 300 spl.b %d0 3011: 302 move.b %d0, %d3 303 lsl.l #8, %d3 304 305 move.w (2,%a0), %d0 306 ext.l %d0 307 cmp.l %d1, %d0 308 bls.b 1f 309 spl.b %d0 3101: 311 move.b %d0, %d3 312 lsl.l #8, %d3 313 clr.l (%a0)+ 314 315 move.w (%a0), %d0 | do b6 and b7 316 ext.l %d0 317 cmp.l %d1, %d0 318 bls.b 1f 319 spl.b %d0 3201: 321 move.b %d0, %d3 322 lsl.l #8, %d3 323 324 move.w (2,%a0), %d0 325 ext.l %d0 326 cmp.l %d1, %d0 327 bls.b 1f 328 spl.b %d0 3291: 330 move.b %d0, %d3 331 clr.l (%a0)+ 332 333 movem.l %d2-%d3, (%a1) | write all 8 output bytes at once 334 add.l %a2, %a1 | advance output pointer 335 subq.l #1, %d4 | loop 8 times 336 bne.w .copy_clip_loop 337 338 movem.l (%sp), %d2-%d7/%a2-%a6 339 lea.l (11*4,%sp), %sp 340 rts 341 342 .align 2 343 344mpeg2_idct_add: 345 lea.l (-11*4,%sp), %sp 346 movem.l %d2-%d7/%a2-%a6, (%sp) 347 movem.l (11*4+4,%sp), %d0/%a0-%a2 | %d0 - last value 348 | %a0 - block pointer 349 | %a1 - destination pointer 350 | %a2 - stride 351 352 cmp.l #129, %d0 | last == 129 ? 353 bne.b .idct_add | no: perform idct + addition 354 move.w (%a0), %d0 355 ext.l %d0 | ((block[0] 356 asr.l #4, %d0 | >> 4) 357 and.l #7, %d0 | & 7) 358 subq.l #4, %d0 | - 4 == 0 ? 359 bne.w .dc_add | no: just perform addition 360 361.idct_add: 362 bsr.w .idct | apply idct 363 movem.l (11*4+8,%sp), %a0-%a2 | reload arguments %a0..%a2 364 365 move.l #255, %d2 | preload constant for clipping 366 clr.l %d3 | used for splitting input words into bytes 367 moveq.l #8, %d4 | loop counter 368 369.add_clip_loop: 370 movem.l (%a1), %d6-%d7 | fetch (b0 b1 b2 b3) (b4 b5 b6 b7) 371 swap %d6 | (b2 b3 b0 b1) 372 swap %d7 | (b6 b7 b4 b5) 373 374 move.w (2,%a0), %d0 | load block[1] 375 ext.l %d0 | sign extend 376 move.b %d6, %d3 | copy b1 377 lsr.l #8, %d6 | prepare 1st buffer for next byte 378 add.l %d3, %d0 | add b1 379 cmp.l %d2, %d0 | overflow ? 380 bls.b 1f 381 spl.b %d0 | yes: set appropriate limit value in low byte 3821: 383 move.w (%a0), %d1 | load block[0] 384 ext.l %d1 | sign extend 385 move.b %d6, %d3 | copy b0 386 lsr.l #8, %d6 | prepare 1st buffer for next byte 387 add.l %d3, %d1 | add b0 388 cmp.l %d2, %d1 | overflow ? 389 bls.b 1f 390 spl.b %d1 | yes: set appropriate limit value in low byte 3911: 392 move.b %d1, %d5 | collect output bytes 0..3 in %d5 393 lsl.l #8, %d5 394 move.b %d0, %d5 395 lsl.l #8, %d5 396 clr.l (%a0)+ | clear block[0] and block[1] 397 | %a0 now pointing to block[2] 398 move.w (2,%a0), %d0 | do b3 and b2 399 ext.l %d0 400 move.b %d6, %d3 401 lsr.l #8, %d6 402 add.l %d3, %d0 403 cmp.l %d2, %d0 404 bls.b 1f 405 spl.b %d0 4061: 407 move.w (%a0), %d1 408 ext.l %d1 409 add.l %d6, %d1 410 cmp.l %d2, %d1 411 bls.b 1f 412 spl.b %d1 4131: 414 move.b %d1, %d5 415 lsl.l #8, %d5 416 move.b %d0, %d5 417 clr.l (%a0)+ 418 419 move.w (2,%a0), %d0 | do b5 and b4 420 ext.l %d0 421 move.b %d7, %d3 422 lsr.l #8, %d7 423 add.l %d3, %d0 424 cmp.l %d2, %d0 425 bls.b 1f 426 spl.b %d0 4271: 428 move.w (%a0), %d1 429 ext.l %d1 430 move.b %d7, %d3 431 lsr.l #8, %d7 432 add.l %d3, %d1 433 cmp.l %d2, %d1 434 bls.b 1f 435 spl.b %d1 4361: 437 move.b %d1, %d6 438 lsl.l #8, %d6 439 move.b %d0, %d6 440 lsl.l #8, %d6 441 clr.l (%a0)+ 442 443 move.w (2,%a0), %d0 | do b7 and b6 444 ext.l %d0 445 move.b %d7, %d3 446 lsr.l #8, %d7 447 add.l %d3, %d0 448 cmp.l %d2, %d0 449 bls.b 1f 450 spl.b %d0 4511: 452 move.w (%a0), %d1 453 ext.l %d1 454 add.l %d7, %d1 455 cmp.l %d2, %d1 456 bls.b 1f 457 spl.b %d1 4581: 459 move.b %d1, %d6 460 lsl.l #8, %d6 461 move.b %d0, %d6 462 clr.l (%a0)+ 463 464 movem.l %d5-%d6, (%a1) | write all 8 output bytes at once 465 add.l %a2, %a1 | advance output pointer 466 subq.l #1, %d4 | loop 8 times 467 bne.w .add_clip_loop 468 469 bra.w .idct_add_end 470 471.dc_add: 472 move.w (%a0), %d0 473 ext.l %d0 | %d0 = (block[0] 474 add.l #64, %d0 | + 64) 475 asr.l #7, %d0 | >> 7 476 clr.w (%a0) | clear block[0] 477 clr.w (63*2,%a0) | and block[63] 478 move.l %d0, %a0 | DC value in %a0 479 480 move.l #255, %d2 | preload constant for clipping 481 clr.l %d3 | for splitting input words into bytes 482 moveq.l #8, %d4 | loop counter 483 484.dc_clip_loop: 485 movem.l (%a1), %d6-%d7 | (b0 b1 b2 b3) (b4 b5 b6 b7) 486 swap %d6 | (b2 b3 b0 b1) 487 swap %d7 | (b6 b7 b4 b5) 488 489 move.l %a0, %d0 | copy DC 490 move.b %d6, %d3 | copy b1 491 lsr.l #8, %d6 | prepare 1st buffer for next byte 492 add.l %d3, %d0 | add b1 493 cmp.l %d2, %d0 | overflow ? 494 bls.b 1f 495 spl.b %d0 | yes: set appropriate limit value in low byte 4961: 497 move.l %a0, %d1 | copy DC 498 move.b %d6, %d3 | copy b0 499 lsr.l #8, %d6 | prepare 1st buffer for next byte 500 add.l %d3, %d1 | add b0 501 cmp.l %d2, %d1 | overflow ? 502 bls.b 1f 503 spl.b %d1 | yes: set appropriate limit value in low byte 5041: 505 move.b %d1, %d5 | collect output bytes 0..3 in %d5 506 lsl.l #8, %d5 507 move.b %d0, %d5 508 lsl.l #8, %d5 509 510 move.l %a0, %d0 | do b3 and b2 511 move.b %d6, %d3 512 lsr.l #8, %d6 513 add.l %d3, %d0 514 cmp.l %d2, %d0 515 bls.b 1f 516 spl.b %d0 5171: 518 move.l %a0, %d1 519 add.l %d6, %d1 520 cmp.l %d2, %d1 521 bls.b 1f 522 spl.b %d1 5231: 524 move.b %d1, %d5 525 lsl.l #8, %d5 526 move.b %d0, %d5 527 528 move.l %a0, %d0 | do b5 and b4 529 move.b %d7, %d3 530 lsr.l #8, %d7 531 add.l %d3, %d0 532 cmp.l %d2, %d0 533 bls.b 1f 534 spl.b %d0 5351: 536 move.l %a0, %d1 537 move.b %d7, %d3 538 lsr.l #8, %d7 539 add.l %d3, %d1 540 cmp.l %d2, %d1 541 bls.b 1f 542 spl.b %d1 5431: 544 move.b %d1, %d6 | do b7 and b6 545 lsl.l #8, %d6 546 move.b %d0, %d6 547 lsl.l #8, %d6 548 549 move.l %a0, %d0 550 move.b %d7, %d3 551 lsr.l #8, %d7 552 add.l %d3, %d0 553 cmp.l %d2, %d0 554 bls.b 1f 555 spl.b %d0 5561: 557 move.l %a0, %d1 558 add.l %d7, %d1 559 cmp.l %d2, %d1 560 bls.b 1f 561 spl.b %d1 5621: 563 move.b %d1, %d6 564 lsl.l #8, %d6 565 move.b %d0, %d6 566 567 movem.l %d5-%d6, (%a1) | write all 8 output bytes at once 568 add.l %a2, %a1 | advance output pointer 569 subq.l #1, %d4 | loop 8 times 570 bne.w .dc_clip_loop 571 572.idct_add_end: 573 movem.l (%sp), %d2-%d7/%a2-%a6 574 lea.l (11*4,%sp), %sp 575 rts