apps/plugins/mpegplayer/libmpeg2/idct_coldfire.S at master

tsiry-sandratraina.com / rockbox-zig
fork atom
A modern Music Player Daemon based on Rockbox open source high quality audio player
libadwaita audio rust zig deno mpris rockbox mpd
fork atom
rockbox-zig / apps / plugins / mpegplayer / libmpeg2 / idct_coldfire.S
at master 575 lines 18 kB view raw
wrap content
Solomon Peachy Revert "RFC: Get rid of mpegplayer plugin" 3y ago
9d3d9252
  1/***************************************************************************
  2 *             __________               __   ___.
  3 *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
  4 *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
  5 *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
  6 *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
  7 *                     \/            \/     \/    \/            \/
  8 * $Id$
  9 *
 10 * Copyright (C) 2007 Jens Arnold
 11 * Based on the work of Karim Boucher and Rani Hod
 12 *
 13 * This program is free software; you can redistribute it and/or
 14 * modify it under the terms of the GNU General Public License
 15 * as published by the Free Software Foundation; either version 2
 16 * of the License, or (at your option) any later version.
 17 *
 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 19 * KIND, either express or implied.
 20 *
 21 ****************************************************************************/
 22 
 23    .global     mpeg2_idct_copy
 24    .type       mpeg2_idct_copy, @function
 25    .global     mpeg2_idct_add
 26    .type       mpeg2_idct_add, @function
 27
 28    /* The IDCT itself.
 29     * Input: %a0: block pointer
 30     * Caller must save all registers. */
 31    .align  2
 32.idct:
 33    move.l  %a0, %a6
 34
 35    move.l  #0, %macsr              | signed integer mode
 36
 37    move.l  #((2048<<16)+2841), %a0 | W0,  W1
 38    move.l  #((2676<<16)+2408), %a1 | W2,  W3
 39    move.l  #((2048<<16)+1609), %a2 | W4,  W5
 40    move.l  #((1108<<16)+ 565), %a3 | W6,  W7
 41    
 42    lea.l   (128,%a6), %a4      | secondary, transposed temp buffer
 43    moveq.l #8, %d3             | loop counter
 44    
 45.row_loop:
 46    movem.l (%a6), %d0-%d2/%a5  | fetch (f0, f2, f4, f6, f1, f3, f5, f7)
 47    
 48    mac.w   %a0l, %d2u, %acc0   | %acc0 = W1 * f1
 49    mac.w   %a1l, %d2l, %acc0   |       + W3 * f3
 50    mac.w   %a2l, %a5u, %acc0   |       + W5 * f5
 51    mac.w   %a3l, %a5l, %acc0   |       + W7 * f7
 52
 53    mac.w   %a1l, %d2u, %acc1   | %acc1 = W3 * f1
 54    msac.w  %a3l, %d2l, %acc1   |       - W7 * f3
 55    msac.w  %a0l, %a5u, %acc1   |       - W1 * f5
 56    msac.w  %a2l, %a5l, %acc1   |       - W5 * f7
 57    
 58    mac.w   %a2l, %d2u, %acc2   | %acc2 = W5 * f1
 59    msac.w  %a0l, %d2l, %acc2   |       - W1 * f3
 60    mac.w   %a3l, %a5u, %acc2   |       + W7 * f5
 61    mac.w   %a1l, %a5l, %acc2   |       + W3 * f7
 62
 63    mac.w   %a3l, %d2u, %acc3   | %acc3 = W7 * f1
 64    msac.w  %a2l, %d2l, %acc3   |       - W5 * f3
 65    mac.w   %a1l, %a5u, %acc3   |       + W3 * f5
 66    msac.w  %a0l, %a5l, %acc3   |       - W1 * f7
 67
 68    lea.l   (16,%a6), %a6       | Advance to next row; put here to fill EMAC latency
 69    add.l   #(1<<16), %d0       | f0 += 1; 
 70
 71    movclr.l %acc0, %d4         | b0
 72    movclr.l %acc1, %d5         | b1
 73    movclr.l %acc2, %d6         | b2
 74    movclr.l %acc3, %d7         | b3
 75
 76    mac.w   %a0u, %d0u, %acc0   | %acc0 = W0 * f0
 77    mac.w   %a2u, %d1u, %acc0   |       + W4 * f4
 78    move.l  %acc0, %acc3
 79    mac.w   %a1u, %d0l, %acc0   |       + W2 * f2
 80    mac.w   %a3u, %d1l, %acc0   |       + W6 * f6
 81
 82    mac.w   %a0u, %d0u, %acc1   | %acc1 = W0 * f0
 83    msac.w  %a2u, %d1u, %acc1   |       - W4 * f4
 84    move.l  %acc1, %acc2
 85    mac.w   %a3u, %d0l, %acc1   |       + W6 * f2
 86    msac.w  %a1u, %d1l, %acc1   |       - W2 * f6
 87
 88    | ^ move.l  %acc1, %acc2      %acc2 = W0 * f0 - W4 * f4
 89    msac.w  %a3u, %d0l, %acc2   |       - W6 * f2
 90    mac.w   %a1u, %d1l, %acc2   |       + W2 * f6
 91
 92    | ^ move.l  %acc0, %acc3      %acc3 = W0 * f0 + W4 * f4
 93    msac.w  %a1u, %d0l, %acc3   |       - W2 * f2
 94    msac.w  %a3u, %d1l, %acc3   |       - W6 * f6
 95
 96    moveq.l #12, %d1            | shift amount
 97
 98    move.l  %acc0, %d0          | block[7] = (a0
 99    sub.l   %d4,%d0             |     - b0)
100    asr.l   %d1, %d0            |     >> 12
101    move.w  %d0, (7*16,%a4)
102
103    move.l  %acc1, %d0          | block[6] = (a1
104    sub.l   %d5,%d0             |     - b1)
105    asr.l   %d1, %d0            |     >> 12
106    move.w  %d0, (6*16,%a4)
107   
108    move.l  %acc2, %d0          | block[5] = (a2
109    sub.l   %d6,%d0             |     - b2)
110    asr.l   %d1, %d0            |     >> 12
111    move.w  %d0, (5*16,%a4)
112   
113    move.l  %acc3, %d0          | block[4] = (a3
114    sub.l   %d7,%d0             |     - b3)
115    asr.l   %d1, %d0            |     >> 12
116    move.w  %d0, (4*16,%a4)
117
118    movclr.l %acc3, %d0         | block[3] = (a3
119    add.l   %d7, %d0            |     + b3)
120    asr.l   %d1, %d0            |     >> 12
121    move.w  %d0, (3*16,%a4)
122
123    movclr.l %acc2, %d0         | block[2] = (a2
124    add.l   %d6, %d0            |     + b2)
125    asr.l   %d1, %d0            |     >> 12
126    move.w  %d0, (2*16,%a4)
127
128    movclr.l %acc1, %d0         | block[1] = (a1
129    add.l   %d5, %d0            |     + b1)
130    asr.l   %d1, %d0            |     >> 12
131    move.w  %d0, (1*16,%a4)
132
133    movclr.l %acc0, %d0         | block[0] = (a0
134    add.l   %d4, %d0            |     + b0)
135    asr.l   %d1, %d0            |     >> 12
136    move.w  %d0, (%a4)+         | advance to next temp column
137    
138    subq.l  #1, %d3             | loop 8 times
139    bne.w   .row_loop
140    
141    | %a6 now points to the temp buffer, where we need it.
142    lea.l   (-16-128,%a4), %a4  | point %a4 back to the input block
143    moveq.l #8, %d3             | loop counter
144    
145.col_loop:
146    movem.l (%a6), %d0-%d2/%a5  | fetch (f0, f2, f4, f6, f1, f3, f5, f7)
147  
148    mac.w   %a0l, %d2u, %acc0   | %acc0 = W1 * f1
149    mac.w   %a1l, %d2l, %acc0   |       + W3 * f3
150    mac.w   %a2l, %a5u, %acc0   |       + W5 * f5
151    mac.w   %a3l, %a5l, %acc0   |       + W7 * f7
152
153    mac.w   %a1l, %d2u, %acc1   | %acc1 = W3 * f1
154    msac.w  %a3l, %d2l, %acc1   |       - W7 * f3
155    msac.w  %a0l, %a5u, %acc1   |       - W1 * f5
156    msac.w  %a2l, %a5l, %acc1   |       - W5 * f7
157    
158    mac.w   %a2l, %d2u, %acc2   | %acc2 = W5 * f1
159    msac.w  %a0l, %d2l, %acc2   |       - W1 * f3
160    mac.w   %a3l, %a5u, %acc2   |       + W7 * f5
161    mac.w   %a1l, %a5l, %acc2   |       + W3 * f7
162
163    mac.w   %a3l, %d2u, %acc3   | %acc3 = W7 * f1
164    msac.w  %a2l, %d2l, %acc3   |       - W5 * f3
165    mac.w   %a1l, %a5u, %acc3   |       + W3 * f5
166    msac.w  %a0l, %a5l, %acc3   |       - W1 * f7
167    
168    lea.l   (16,%a6), %a6       | Advance to next row; put here to fill EMAC latency
169    add.l   #(32<<16), %d0      | DC offset: 0.5
170
171    movclr.l %acc0, %d4         | b0
172    movclr.l %acc1, %d5         | b1
173    movclr.l %acc2, %d6         | b2
174    movclr.l %acc3, %d7         | b3
175
176    mac.w   %a0u, %d0u, %acc0   | %acc0 = W0 * f0
177    mac.w   %a2u, %d1u, %acc0   |       + W4 * f4
178    move.l  %acc0, %acc3
179    mac.w   %a1u, %d0l, %acc0   |       + W2 * f2
180    mac.w   %a3u, %d1l, %acc0   |       + W6 * f6
181
182    mac.w   %a0u, %d0u, %acc1   | %acc1 = W0 * f0
183    msac.w  %a2u, %d1u, %acc1   |       - W4 * f4
184    move.l  %acc1, %acc2
185    mac.w   %a3u, %d0l, %acc1   |       + W6 * f2
186    msac.w  %a1u, %d1l, %acc1   |       - W2 * f6
187
188    | ^ move.l  %acc1, %acc2      %acc2 = W0 * f0 - W4 * f4
189    msac.w  %a3u, %d0l, %acc2   |       - W6 * f2
190    mac.w   %a1u, %d1l, %acc2   |       + W2 * f6
191
192    | ^ move.l  %acc0, %acc3      %acc3 = W0 * f0 + W4 * f4
193    msac.w  %a1u, %d0l, %acc3   |       - W2 * f2
194    msac.w  %a3u, %d1l, %acc3   |       - W6 * f6
195
196    moveq.l #17, %d1            | shift amount
197
198    move.l  %acc0, %d0          | block[7] = (a0
199    sub.l   %d4,%d0             |     - b0)
200    asr.l   %d1, %d0            |     >> 17
201    move.w  %d0, (7*16,%a4)
202   
203    move.l  %acc1, %d0          | block[6] = (a1
204    sub.l   %d5,%d0             |     - b1)
205    asr.l   %d1, %d0            |     >> 17
206    move.w  %d0, (6*16,%a4)
207   
208    move.l  %acc2, %d0          | block[5] = (a2
209    sub.l   %d6,%d0             |     - b2)
210    asr.l   %d1, %d0            |     >> 17
211    move.w  %d0, (5*16,%a4)
212   
213    move.l  %acc3, %d0          | block[4] = (a3
214    sub.l   %d7,%d0             |     - b3)
215    asr.l   %d1, %d0            |     >> 17
216    move.w  %d0, (4*16,%a4)
217   
218    movclr.l %acc3, %d0         | block[3] = (a3
219    add.l   %d7, %d0            |     + b3)
220    asr.l   %d1, %d0            |     >> 17
221    move.w  %d0, (3*16,%a4)
222   
223    movclr.l %acc2, %d0         | block[2] = (a2
224    add.l   %d6, %d0            |     + b2)
225    asr.l   %d1, %d0            |     >> 17
226    move.w  %d0, (2*16,%a4)
227   
228    movclr.l %acc1, %d0         | block[1] = (a1
229    add.l   %d5, %d0            |     + b1)
230    asr.l   %d1, %d0            |     >> 17
231    move.w  %d0, (1*16,%a4)
232   
233    movclr.l %acc0, %d0         | block[0] = (a0
234    add.l   %d4, %d0            |     + b0)
235    asr.l   %d1, %d0            |     >> 17
236    move.w  %d0, (%a4)+         | advance to next column
237
238    subq.l  #1, %d3             | loop 8 times
239    bne.w   .col_loop
240    
241    rts
242    
243    .align  2
244
245mpeg2_idct_copy:
246    lea.l   (-11*4,%sp), %sp
247    movem.l %d2-%d7/%a2-%a6, (%sp)  | save some registers
248    move.l  (11*4+4,%sp), %a0       | %a0 - block pointer for idct
249
250    bsr.w   .idct                   | apply idct to block
251    movem.l (11*4+4,%sp), %a0-%a2   | %a0 - block pointer
252                                    | %a1 - destination pointer
253                                    | %a2 - stride
254
255    move.l  #255, %d1           | preload constant for clipping
256    moveq.l #8, %d4             | loop counter
257    
258.copy_clip_loop:
259    move.w  (%a0), %d0          | load block[0]
260    ext.l   %d0                 | sign extend
261    cmp.l   %d1, %d0            | overflow?
262    bls.b   1f
263    spl.b   %d0                 |   yes: set appropriate limit value in low byte
2641:
265    move.b  %d0, %d2            | collect output bytes 0..3 in %d2
266    lsl.l   #8, %d2
267
268    move.w  (2,%a0), %d0        | load block[1]
269    ext.l   %d0                 | sign extend
270    cmp.l   %d1, %d0            | overflow?
271    bls.b   1f
272    spl.b   %d0                 | yes: set appropriate limit value in low byte
2731:
274    move.b  %d0, %d2            | collect output bytes 0..3 in %d2
275    lsl.l   #8, %d2
276    clr.l   (%a0)+              | clear block[0] and block[1],
277                                | %a0 now pointing to block[2]
278    move.w  (%a0), %d0          | do b2 and b3
279    ext.l   %d0
280    cmp.l   %d1, %d0
281    bls.b   1f
282    spl.b   %d0
2831:
284    move.b  %d0, %d2
285    lsl.l   #8, %d2
286
287    move.w  (2,%a0), %d0
288    ext.l   %d0
289    cmp.l   %d1, %d0
290    bls.b   1f
291    spl.b   %d0
2921:
293    move.b  %d0, %d2
294    clr.l   (%a0)+
295
296    move.w  (%a0), %d0          | do b4 and b5
297    ext.l   %d0
298    cmp.l   %d1, %d0
299    bls.b   1f
300    spl.b   %d0
3011:
302    move.b  %d0, %d3
303    lsl.l   #8, %d3
304
305    move.w  (2,%a0), %d0
306    ext.l   %d0
307    cmp.l   %d1, %d0
308    bls.b   1f
309    spl.b   %d0
3101:
311    move.b  %d0, %d3
312    lsl.l   #8, %d3
313    clr.l   (%a0)+
314
315    move.w  (%a0), %d0          | do b6 and b7
316    ext.l   %d0
317    cmp.l   %d1, %d0
318    bls.b   1f
319    spl.b   %d0
3201:
321    move.b  %d0, %d3
322    lsl.l   #8, %d3
323
324    move.w  (2,%a0), %d0
325    ext.l   %d0
326    cmp.l   %d1, %d0
327    bls.b   1f
328    spl.b   %d0
3291:
330    move.b  %d0, %d3            
331    clr.l   (%a0)+
332    
333    movem.l %d2-%d3, (%a1)      | write all 8 output bytes at once
334    add.l   %a2, %a1            | advance output pointer
335    subq.l  #1, %d4             | loop 8 times
336    bne.w   .copy_clip_loop
337
338    movem.l (%sp), %d2-%d7/%a2-%a6
339    lea.l   (11*4,%sp), %sp
340    rts
341
342    .align  2
343
344mpeg2_idct_add:
345    lea.l   (-11*4,%sp), %sp
346    movem.l %d2-%d7/%a2-%a6, (%sp)
347    movem.l (11*4+4,%sp), %d0/%a0-%a2   | %d0 - last value
348                                        | %a0 - block pointer
349                                        | %a1 - destination pointer
350                                        | %a2 - stride
351
352    cmp.l   #129, %d0           | last == 129 ?
353    bne.b   .idct_add           |   no: perform idct + addition
354    move.w  (%a0), %d0
355    ext.l   %d0                 | ((block[0]
356    asr.l   #4, %d0             |      >> 4)
357    and.l   #7, %d0             |      & 7)
358    subq.l  #4, %d0             |      - 4 == 0 ?
359    bne.w   .dc_add             |   no: just perform addition
360
361.idct_add:
362    bsr.w   .idct                   | apply idct
363    movem.l (11*4+8,%sp), %a0-%a2   | reload arguments %a0..%a2
364
365    move.l  #255, %d2           | preload constant for clipping
366    clr.l   %d3                 | used for splitting input words into bytes
367    moveq.l #8, %d4             | loop counter
368    
369.add_clip_loop:
370    movem.l (%a1), %d6-%d7      | fetch (b0 b1 b2 b3) (b4 b5 b6 b7)
371    swap    %d6                 | (b2 b3 b0 b1) 
372    swap    %d7                 | (b6 b7 b4 b5)
373    
374    move.w  (2,%a0), %d0        | load block[1]
375    ext.l   %d0                 | sign extend
376    move.b  %d6, %d3            | copy b1
377    lsr.l   #8, %d6             | prepare 1st buffer for next byte
378    add.l   %d3, %d0            | add b1
379    cmp.l   %d2, %d0            | overflow ?
380    bls.b   1f
381    spl.b   %d0                 |   yes: set appropriate limit value in low byte
3821:
383    move.w  (%a0), %d1          | load block[0]
384    ext.l   %d1                 | sign extend
385    move.b  %d6, %d3            | copy b0
386    lsr.l   #8, %d6             | prepare 1st buffer for next byte
387    add.l   %d3, %d1            | add b0
388    cmp.l   %d2, %d1            | overflow ?
389    bls.b   1f
390    spl.b   %d1                 |   yes: set appropriate limit value in low byte
3911:
392    move.b  %d1, %d5            | collect output bytes 0..3 in %d5
393    lsl.l   #8, %d5
394    move.b  %d0, %d5
395    lsl.l   #8, %d5
396    clr.l   (%a0)+              | clear block[0] and block[1]
397                                |   %a0 now pointing to block[2]
398    move.w  (2,%a0), %d0        | do b3 and b2
399    ext.l   %d0
400    move.b  %d6, %d3
401    lsr.l   #8, %d6
402    add.l   %d3, %d0
403    cmp.l   %d2, %d0
404    bls.b   1f
405    spl.b   %d0
4061:
407    move.w  (%a0), %d1
408    ext.l   %d1
409    add.l   %d6, %d1
410    cmp.l   %d2, %d1
411    bls.b   1f
412    spl.b   %d1
4131:
414    move.b  %d1, %d5
415    lsl.l   #8, %d5
416    move.b  %d0, %d5
417    clr.l   (%a0)+
418
419    move.w  (2,%a0), %d0        | do b5 and b4
420    ext.l   %d0
421    move.b  %d7, %d3
422    lsr.l   #8, %d7
423    add.l   %d3, %d0
424    cmp.l   %d2, %d0
425    bls.b   1f
426    spl.b   %d0
4271:
428    move.w  (%a0), %d1
429    ext.l   %d1
430    move.b  %d7, %d3
431    lsr.l   #8, %d7
432    add.l   %d3, %d1
433    cmp.l   %d2, %d1
434    bls.b   1f
435    spl.b   %d1
4361:
437    move.b  %d1, %d6
438    lsl.l   #8, %d6
439    move.b  %d0, %d6
440    lsl.l   #8, %d6
441    clr.l   (%a0)+
442
443    move.w  (2,%a0), %d0        | do b7 and b6
444    ext.l   %d0
445    move.b  %d7, %d3
446    lsr.l   #8, %d7
447    add.l   %d3, %d0
448    cmp.l   %d2, %d0
449    bls.b   1f
450    spl.b   %d0
4511:
452    move.w  (%a0), %d1
453    ext.l   %d1
454    add.l   %d7, %d1
455    cmp.l   %d2, %d1
456    bls.b   1f
457    spl.b   %d1
4581:
459    move.b  %d1, %d6
460    lsl.l   #8, %d6
461    move.b  %d0, %d6
462    clr.l   (%a0)+
463
464    movem.l %d5-%d6, (%a1)      | write all 8 output bytes at once
465    add.l   %a2, %a1            | advance output pointer
466    subq.l  #1, %d4             | loop 8 times
467    bne.w   .add_clip_loop
468
469    bra.w   .idct_add_end
470    
471.dc_add:
472    move.w  (%a0), %d0
473    ext.l   %d0                 | %d0 = (block[0]
474    add.l   #64, %d0            |       + 64)
475    asr.l   #7, %d0             |       >> 7
476    clr.w   (%a0)               | clear block[0]
477    clr.w   (63*2,%a0)          |   and block[63]
478    move.l  %d0, %a0            | DC value in %a0
479    
480    move.l  #255, %d2           | preload constant for clipping
481    clr.l   %d3                 | for splitting input words into bytes
482    moveq.l #8, %d4             | loop counter
483    
484.dc_clip_loop:
485    movem.l (%a1), %d6-%d7      | (b0 b1 b2 b3) (b4 b5 b6 b7)
486    swap    %d6                 | (b2 b3 b0 b1)
487    swap    %d7                 | (b6 b7 b4 b5)
488    
489    move.l  %a0, %d0            | copy DC
490    move.b  %d6, %d3            | copy b1
491    lsr.l   #8, %d6             | prepare 1st buffer for next byte
492    add.l   %d3, %d0            | add b1
493    cmp.l   %d2, %d0            | overflow ?
494    bls.b   1f
495    spl.b   %d0                 |   yes: set appropriate limit value in low byte
4961:
497    move.l  %a0, %d1            | copy DC
498    move.b  %d6, %d3            | copy b0
499    lsr.l   #8, %d6             | prepare 1st buffer for next byte
500    add.l   %d3, %d1            | add b0
501    cmp.l   %d2, %d1            | overflow ?
502    bls.b   1f
503    spl.b   %d1                 |   yes: set appropriate limit value in low byte
5041:
505    move.b  %d1, %d5            | collect output bytes 0..3 in %d5
506    lsl.l   #8, %d5
507    move.b  %d0, %d5
508    lsl.l   #8, %d5
509
510    move.l  %a0, %d0            | do b3 and b2
511    move.b  %d6, %d3
512    lsr.l   #8, %d6
513    add.l   %d3, %d0
514    cmp.l   %d2, %d0
515    bls.b   1f
516    spl.b   %d0
5171:
518    move.l  %a0, %d1
519    add.l   %d6, %d1
520    cmp.l   %d2, %d1
521    bls.b   1f
522    spl.b   %d1
5231:
524    move.b  %d1, %d5
525    lsl.l   #8, %d5
526    move.b  %d0, %d5
527
528    move.l  %a0, %d0            | do b5 and b4
529    move.b  %d7, %d3
530    lsr.l   #8, %d7
531    add.l   %d3, %d0
532    cmp.l   %d2, %d0
533    bls.b   1f
534    spl.b   %d0
5351:
536    move.l  %a0, %d1
537    move.b  %d7, %d3
538    lsr.l   #8, %d7
539    add.l   %d3, %d1
540    cmp.l   %d2, %d1
541    bls.b   1f
542    spl.b   %d1
5431:
544    move.b  %d1, %d6            | do b7 and b6
545    lsl.l   #8, %d6
546    move.b  %d0, %d6
547    lsl.l   #8, %d6
548
549    move.l  %a0, %d0
550    move.b  %d7, %d3
551    lsr.l   #8, %d7
552    add.l   %d3, %d0
553    cmp.l   %d2, %d0
554    bls.b   1f
555    spl.b   %d0
5561:
557    move.l  %a0, %d1
558    add.l   %d7, %d1
559    cmp.l   %d2, %d1
560    bls.b   1f
561    spl.b   %d1
5621:
563    move.b  %d1, %d6
564    lsl.l   #8, %d6
565    move.b  %d0, %d6
566
567    movem.l %d5-%d6, (%a1)      | write all 8 output bytes at once
568    add.l   %a2, %a1            | advance output pointer
569    subq.l  #1, %d4             | loop 8 times
570    bne.w   .dc_clip_loop
571
572.idct_add_end:
573    movem.l (%sp), %d2-%d7/%a2-%a6
574    lea.l   (11*4,%sp), %sp
575    rts