| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Optimized version of the standard memcpy() function. | 
 | 2 |    This file is part of the GNU C Library. | 
 | 3 |    Copyright (C) 2000-2016 Free Software Foundation, Inc. | 
 | 4 |    Contributed by Dan Pop for Itanium <Dan.Pop@cern.ch>. | 
 | 5 |    Rewritten for McKinley by Sverre Jarp, HP Labs/CERN <Sverre.Jarp@cern.ch> | 
 | 6 |  | 
 | 7 |    The GNU C Library is free software; you can redistribute it and/or | 
 | 8 |    modify it under the terms of the GNU Lesser General Public | 
 | 9 |    License as published by the Free Software Foundation; either | 
 | 10 |    version 2.1 of the License, or (at your option) any later version. | 
 | 11 |  | 
 | 12 |    The GNU C Library is distributed in the hope that it will be useful, | 
 | 13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 15 |    Lesser General Public License for more details. | 
 | 16 |  | 
 | 17 |    You should have received a copy of the GNU Lesser General Public | 
 | 18 |    License along with the GNU C Library; if not, see | 
 | 19 |    <http://www.gnu.org/licenses/>.  */ | 
 | 20 |  | 
 | 21 | /* Return: dest | 
 | 22 |  | 
 | 23 |    Inputs: | 
 | 24 |         in0:    dest | 
 | 25 |         in1:    src | 
 | 26 |         in2:    byte count | 
 | 27 |  | 
 | 28 |    An assembly implementation of the algorithm used by the generic C | 
 | 29 |    version from glibc.  The case when source and sest are aligned is | 
 | 30 |    treated separately, for extra performance. | 
 | 31 |  | 
 | 32 |    In this form, memcpy assumes little endian mode.  For big endian mode, | 
 | 33 |    sh1 must be computed using an extra instruction: sub sh1 = 64, sh1 | 
 | 34 |    and the order of r[MEMLAT] and r[MEMLAT+1] must be reverted in the | 
 | 35 |    shrp instruction.  */ | 
 | 36 |  | 
 | 37 | #define USE_LFETCH | 
 | 38 | #define USE_FLP | 
 | 39 | #include <sysdep.h> | 
 | 40 | #undef ret | 
 | 41 |  | 
 | 42 | #define LFETCH_DIST     500 | 
 | 43 |  | 
 | 44 | #define ALIGN_UNROLL_no   4 // no. of elements | 
 | 45 | #define ALIGN_UNROLL_sh	  2 // (shift amount) | 
 | 46 |  | 
 | 47 | #define MEMLAT	8 | 
 | 48 | #define Nrot	((4*(MEMLAT+2) + 7) & ~7) | 
 | 49 |  | 
 | 50 | #define OP_T_THRES 	16 | 
 | 51 | #define OPSIZ 		8 | 
 | 52 |  | 
 | 53 | #define loopcnt		r14 | 
 | 54 | #define elemcnt		r15 | 
 | 55 | #define saved_pr	r16 | 
 | 56 | #define saved_lc	r17 | 
 | 57 | #define adest		r18 | 
 | 58 | #define dest		r19 | 
 | 59 | #define asrc		r20 | 
 | 60 | #define src		r21 | 
 | 61 | #define len		r22 | 
 | 62 | #define tmp2		r23 | 
 | 63 | #define tmp3		r24 | 
 | 64 | #define	tmp4		r25 | 
 | 65 | #define ptable		r26 | 
 | 66 | #define ploop56		r27 | 
 | 67 | #define	loopaddr	r28 | 
 | 68 | #define	sh1		r29 | 
 | 69 | #define ptr1		r30 | 
 | 70 | #define ptr2		r31 | 
 | 71 |  | 
 | 72 | #define movi0 		mov | 
 | 73 |  | 
 | 74 | #define p_scr		p6 | 
 | 75 | #define p_xtr		p7 | 
 | 76 | #define p_nxtr		p8 | 
 | 77 | #define p_few		p9 | 
 | 78 |  | 
 | 79 | #if defined(USE_FLP) | 
 | 80 | #define load		ldf8 | 
 | 81 | #define store		stf8 | 
 | 82 | #define tempreg		f6 | 
 | 83 | #define the_r		fr | 
 | 84 | #define the_s		fs | 
 | 85 | #define the_t		ft | 
 | 86 | #define the_q		fq | 
 | 87 | #define the_w		fw | 
 | 88 | #define the_x		fx | 
 | 89 | #define the_y		fy | 
 | 90 | #define the_z		fz | 
 | 91 | #elif defined(USE_INT) | 
 | 92 | #define load		ld8 | 
 | 93 | #define store		st8 | 
 | 94 | #define tempreg		tmp2 | 
 | 95 | #define the_r		r | 
 | 96 | #define the_s		s | 
 | 97 | #define the_t		t | 
 | 98 | #define the_q		q | 
 | 99 | #define the_w		w | 
 | 100 | #define the_x		x | 
 | 101 | #define the_y		y | 
 | 102 | #define the_z		z | 
 | 103 | #endif | 
 | 104 |  | 
 | 105 | #ifdef GAS_ALIGN_BREAKS_UNWIND_INFO | 
 | 106 | /* Manually force proper loop-alignment.  Note: be sure to | 
 | 107 |    double-check the code-layout after making any changes to | 
 | 108 |    this routine! */ | 
 | 109 | # define ALIGN(n)	{ nop 0 } | 
 | 110 | #else | 
 | 111 | # define ALIGN(n)	.align n | 
 | 112 | #endif | 
 | 113 |  | 
 | 114 | #if defined(USE_LFETCH) | 
 | 115 | #define LOOP(shift)						\ | 
 | 116 | 		ALIGN(32);					\ | 
 | 117 | .loop##shift##:							\ | 
 | 118 | { .mmb								\ | 
 | 119 | (p[0])	ld8.nt1	r[0] = [asrc], 8 ;				\ | 
 | 120 | (p[0])	lfetch.nt1 [ptr1], 16 ;					\ | 
 | 121 | 	nop.b 0 ;						\ | 
 | 122 | } { .mib							\ | 
 | 123 | (p[MEMLAT+1]) st8 [dest] = tmp3, 8 ;				\ | 
 | 124 | (p[MEMLAT]) shrp tmp3 = r[MEMLAT], s[MEMLAT+1], shift ;		\ | 
 | 125 | 	nop.b 0 ;;						\ | 
 | 126 |  } { .mmb							\ | 
 | 127 | (p[0])	ld8.nt1	s[0] = [asrc], 8 ;				\ | 
 | 128 | (p[0])	lfetch.nt1	[ptr2], 16 ;				\ | 
 | 129 | 	nop.b 0 ;						\ | 
 | 130 | } { .mib							\ | 
 | 131 | (p[MEMLAT+1]) st8 [dest] = tmp4, 8 ;				\ | 
 | 132 | (p[MEMLAT]) shrp tmp4 = s[MEMLAT], r[MEMLAT], shift ;		\ | 
 | 133 | 	br.ctop.sptk.many .loop##shift 				\ | 
 | 134 | ;; }								\ | 
 | 135 | { .mib								\ | 
 | 136 | 	br.cond.sptk.many .copy_bytes ; /* deal with the remaining bytes */  \ | 
 | 137 | } | 
 | 138 | #else | 
 | 139 | #define LOOP(shift)						\ | 
 | 140 | 		ALIGN(32);					\ | 
 | 141 | .loop##shift##:							\ | 
 | 142 | { .mmb								\ | 
 | 143 | (p[0])	ld8.nt1	r[0] = [asrc], 8 ;				\ | 
 | 144 | 	nop.b 0 ;						\ | 
 | 145 | } { .mib							\ | 
 | 146 | (p[MEMLAT+1]) st8 [dest] = tmp3, 8 ;				\ | 
 | 147 | (p[MEMLAT]) shrp tmp3 = r[MEMLAT], s[MEMLAT+1], shift ;		\ | 
 | 148 | 	nop.b 0 ;;						\ | 
 | 149 |  } { .mmb							\ | 
 | 150 | (p[0])	ld8.nt1	s[0] = [asrc], 8 ;				\ | 
 | 151 | 	nop.b 0 ;						\ | 
 | 152 | } { .mib							\ | 
 | 153 | (p[MEMLAT+1]) st8 [dest] = tmp4, 8 ;				\ | 
 | 154 | (p[MEMLAT]) shrp tmp4 = s[MEMLAT], r[MEMLAT], shift ;		\ | 
 | 155 | 	br.ctop.sptk.many .loop##shift 				\ | 
 | 156 | ;; }								\ | 
 | 157 | { .mib								\ | 
 | 158 | 	br.cond.sptk.many .copy_bytes ; /* deal with the remaining bytes */  \ | 
 | 159 | } | 
 | 160 | #endif | 
 | 161 |  | 
 | 162 |  | 
 | 163 | ENTRY(memcpy) | 
 | 164 | { .mmi | 
 | 165 | 	.prologue | 
 | 166 | 	alloc 	r2 = ar.pfs, 3, Nrot - 3, 0, Nrot | 
 | 167 | 	.rotr	r[MEMLAT+1], s[MEMLAT+2], q[MEMLAT+1], t[MEMLAT+1] | 
 | 168 | 	.rotp	p[MEMLAT+2] | 
 | 169 | 	.rotf	fr[MEMLAT+1], fq[MEMLAT+1], fs[MEMLAT+1], ft[MEMLAT+1] | 
 | 170 | 	mov	ret0 = in0		// return tmp2 = dest | 
 | 171 | 	.save   pr, saved_pr | 
 | 172 | 	movi0	saved_pr = pr		// save the predicate registers | 
 | 173 | } { .mmi | 
 | 174 | 	and	tmp4 = 7, in0 		// check if destination is aligned | 
 | 175 | 	mov 	dest = in0		// dest | 
 | 176 | 	mov 	src = in1		// src | 
 | 177 | ;; } | 
 | 178 | { .mii | 
 | 179 | 	cmp.eq	p_scr, p0 = in2, r0	// if (len == 0) | 
 | 180 | 	.save   ar.lc, saved_lc | 
 | 181 |         movi0 	saved_lc = ar.lc	// save the loop counter | 
 | 182 | 	.body | 
 | 183 | 	cmp.ge	p_few, p0 = OP_T_THRES, in2 // is len <= OP_T_THRESH | 
 | 184 | } { .mbb | 
 | 185 | 	mov	len = in2		// len | 
 | 186 | (p_scr)	br.cond.dpnt.few .restore_and_exit // 	Branch no. 1: return dest | 
 | 187 | (p_few) br.cond.dpnt.many .copy_bytes	// Branch no. 2: copy byte by byte | 
 | 188 | ;; } | 
 | 189 | { .mmi | 
 | 190 | #if defined(USE_LFETCH) | 
 | 191 | 	lfetch.nt1 [dest]		// | 
 | 192 | 	lfetch.nt1 [src]		// | 
 | 193 | #endif | 
 | 194 | 	shr.u	elemcnt = len, 3	// elemcnt = len / 8 | 
 | 195 | } { .mib | 
 | 196 | 	cmp.eq	p_scr, p0 = tmp4, r0	// is destination aligned? | 
 | 197 | 	sub	loopcnt = 7, tmp4	// | 
 | 198 | (p_scr) br.cond.dptk.many .dest_aligned | 
 | 199 | ;; } | 
 | 200 | { .mmi | 
 | 201 | 	ld1	tmp2 = [src], 1		// | 
 | 202 | 	sub	len = len, loopcnt, 1	// reduce len | 
 | 203 | 	movi0	ar.lc = loopcnt		// | 
 | 204 | } { .mib | 
 | 205 | 	cmp.ne  p_scr, p0 = 0, loopcnt	// avoid loading beyond end-point | 
 | 206 | ;; } | 
 | 207 |  | 
 | 208 | .l0:	// ---------------------------- // L0: Align src on 8-byte boundary | 
 | 209 | { .mmi | 
 | 210 | 	st1	[dest] = tmp2, 1	// | 
 | 211 | (p_scr)	ld1	tmp2 = [src], 1		// | 
 | 212 | } { .mib | 
 | 213 | 	cmp.lt	p_scr, p0 = 1, loopcnt	// avoid load beyond end-point | 
 | 214 | 	add	loopcnt = -1, loopcnt | 
 | 215 | 	br.cloop.dptk.few .l0		// | 
 | 216 | ;; } | 
 | 217 |  | 
 | 218 | .dest_aligned: | 
 | 219 | { .mmi | 
 | 220 | 	and	tmp4 = 7, src		// ready for alignment check | 
 | 221 | 	shr.u	elemcnt = len, 3	// elemcnt = len / 8 | 
 | 222 | ;; } | 
 | 223 | { .mib | 
 | 224 | 	cmp.ne	p_scr, p0 = tmp4, r0	// is source also aligned | 
 | 225 | 	tbit.nz p_xtr, p_nxtr = src, 3	// prepare a separate move if src | 
 | 226 | } { .mib				// is not 16B aligned | 
 | 227 | 	add	ptr2 = LFETCH_DIST, dest	// prefetch address | 
 | 228 | 	add	ptr1 = LFETCH_DIST, src | 
 | 229 | (p_scr) br.cond.dptk.many .src_not_aligned | 
 | 230 | ;; } | 
 | 231 |  | 
 | 232 | // The optimal case, when dest, and src are aligned | 
 | 233 |  | 
 | 234 | .both_aligned: | 
 | 235 | { .mmi | 
 | 236 | 	.pred.rel "mutex",p_xtr,p_nxtr | 
 | 237 | (p_xtr)	cmp.gt  p_scr, p0 = ALIGN_UNROLL_no+1, elemcnt // Need N + 1 to qualify | 
 | 238 | (p_nxtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no, elemcnt  // Need only N to qualify | 
 | 239 | 	movi0	pr.rot = 1 << 16	// set rotating predicates | 
 | 240 | } { .mib | 
 | 241 | (p_scr) br.cond.dpnt.many .copy_full_words | 
 | 242 | ;; } | 
 | 243 |  | 
 | 244 | { .mmi | 
 | 245 | (p_xtr)	load	tempreg = [src], 8 | 
 | 246 | (p_xtr) add 	elemcnt = -1, elemcnt | 
 | 247 | 	movi0	ar.ec = MEMLAT + 1	// set the epilog counter | 
 | 248 | ;; } | 
 | 249 | { .mmi | 
 | 250 | (p_xtr) add	len = -8, len		// | 
 | 251 | 	add 	asrc = 16, src 		// one bank apart (for USE_INT) | 
 | 252 | 	shr.u	loopcnt = elemcnt, ALIGN_UNROLL_sh  // cater for unrolling | 
 | 253 | ;;} | 
 | 254 | { .mmi | 
 | 255 | 	add	loopcnt = -1, loopcnt | 
 | 256 | (p_xtr)	store	[dest] = tempreg, 8	// copy the "extra" word | 
 | 257 | 	nop.i	0 | 
 | 258 | ;; } | 
 | 259 | { .mib | 
 | 260 | 	add	adest = 16, dest | 
 | 261 | 	movi0	ar.lc = loopcnt 	// set the loop counter | 
 | 262 | ;; } | 
 | 263 |  | 
 | 264 | #ifdef  GAS_ALIGN_BREAKS_UNWIND_INFO | 
 | 265 | 	{ nop 0 } | 
 | 266 | #else | 
 | 267 | 	.align	32 | 
 | 268 | #endif | 
 | 269 | #if defined(USE_FLP) | 
 | 270 | .l1: // ------------------------------- // L1: Everything a multiple of 8 | 
 | 271 | { .mmi | 
 | 272 | #if defined(USE_LFETCH) | 
 | 273 | (p[0])	lfetch.nt1 [ptr2],32 | 
 | 274 | #endif | 
 | 275 | (p[0])	ldfp8	the_r[0],the_q[0] = [src], 16 | 
 | 276 | (p[0])	add	len = -32, len | 
 | 277 | } {.mmb | 
 | 278 | (p[MEMLAT]) store [dest] = the_r[MEMLAT], 8 | 
 | 279 | (p[MEMLAT]) store [adest] = the_s[MEMLAT], 8 | 
 | 280 | ;; } | 
 | 281 | { .mmi | 
 | 282 | #if defined(USE_LFETCH) | 
 | 283 | (p[0])	lfetch.nt1 [ptr1],32 | 
 | 284 | #endif | 
 | 285 | (p[0])	ldfp8	the_s[0], the_t[0] = [src], 16 | 
 | 286 | } {.mmb | 
 | 287 | (p[MEMLAT]) store [dest] = the_q[MEMLAT], 24 | 
 | 288 | (p[MEMLAT]) store [adest] = the_t[MEMLAT], 24 | 
 | 289 | 	br.ctop.dptk.many .l1 | 
 | 290 | ;; } | 
 | 291 | #elif defined(USE_INT) | 
 | 292 | .l1: // ------------------------------- // L1: Everything a multiple of 8 | 
 | 293 | { .mmi | 
 | 294 | (p[0])	load	the_r[0] = [src], 8 | 
 | 295 | (p[0])	load	the_q[0] = [asrc], 8 | 
 | 296 | (p[0])	add	len = -32, len | 
 | 297 | } {.mmb | 
 | 298 | (p[MEMLAT]) store [dest] = the_r[MEMLAT], 8 | 
 | 299 | (p[MEMLAT]) store [adest] = the_q[MEMLAT], 8 | 
 | 300 | ;; } | 
 | 301 | { .mmi | 
 | 302 | (p[0])	load	the_s[0]  = [src], 24 | 
 | 303 | (p[0])	load	the_t[0] = [asrc], 24 | 
 | 304 | } {.mmb | 
 | 305 | (p[MEMLAT]) store [dest] = the_s[MEMLAT], 24 | 
 | 306 | (p[MEMLAT]) store [adest] = the_t[MEMLAT], 24 | 
 | 307 | #if defined(USE_LFETCH) | 
 | 308 | ;; } | 
 | 309 | { .mmb | 
 | 310 | (p[0])	lfetch.nt1 [ptr2],32 | 
 | 311 | (p[0])	lfetch.nt1 [ptr1],32 | 
 | 312 | #endif | 
 | 313 | 	br.ctop.dptk.many .l1 | 
 | 314 | ;; } | 
 | 315 | #endif | 
 | 316 |  | 
 | 317 | .copy_full_words: | 
 | 318 | { .mib | 
 | 319 | 	cmp.gt	p_scr, p0 = 8, len	// | 
 | 320 | 	shr.u	elemcnt = len, 3	// | 
 | 321 | (p_scr) br.cond.dpnt.many .copy_bytes | 
 | 322 | ;; } | 
 | 323 | { .mii | 
 | 324 | 	load	tempreg = [src], 8 | 
 | 325 | 	add	loopcnt = -1, elemcnt	// | 
 | 326 | ;; } | 
 | 327 | { .mii | 
 | 328 | 	cmp.ne	p_scr, p0 = 0, loopcnt	// | 
 | 329 | 	mov	ar.lc = loopcnt		// | 
 | 330 | ;; } | 
 | 331 |  | 
 | 332 | .l2: // ------------------------------- // L2: Max 4 words copied separately | 
 | 333 | { .mmi | 
 | 334 | 	store	[dest] = tempreg, 8 | 
 | 335 | (p_scr)	load	tempreg = [src], 8	// | 
 | 336 | 	add	len = -8, len | 
 | 337 | } { .mib | 
 | 338 | 	cmp.lt	p_scr, p0 = 1, loopcnt	// avoid load beyond end-point | 
 | 339 | 	add	loopcnt = -1, loopcnt | 
 | 340 | 	br.cloop.dptk.few  .l2 | 
 | 341 | ;; } | 
 | 342 |  | 
 | 343 | .copy_bytes: | 
 | 344 | { .mib | 
 | 345 | 	cmp.eq	p_scr, p0 = len, r0	// is len == 0 ? | 
 | 346 | 	add	loopcnt = -1, len	// len--; | 
 | 347 | (p_scr)	br.cond.spnt	.restore_and_exit | 
 | 348 | ;; } | 
 | 349 | { .mii | 
 | 350 | 	ld1	tmp2 = [src], 1 | 
 | 351 | 	movi0	ar.lc = loopcnt | 
 | 352 | 	cmp.ne	p_scr, p0 = 0, loopcnt	// avoid load beyond end-point | 
 | 353 | ;; } | 
 | 354 |  | 
 | 355 | .l3: // ------------------------------- // L3: Final byte move | 
 | 356 | { .mmi | 
 | 357 | 	st1	[dest] = tmp2, 1 | 
 | 358 | (p_scr)	ld1	tmp2 = [src], 1 | 
 | 359 | } { .mib | 
 | 360 | 	cmp.lt	p_scr, p0 = 1, loopcnt	// avoid load beyond end-point | 
 | 361 | 	add	loopcnt = -1, loopcnt | 
 | 362 | 	br.cloop.dptk.few  .l3 | 
 | 363 | ;; } | 
 | 364 |  | 
 | 365 | .restore_and_exit: | 
 | 366 | { .mmi | 
 | 367 | 	movi0	pr = saved_pr, -1	// restore the predicate registers | 
 | 368 | ;; } | 
 | 369 | { .mib | 
 | 370 | 	movi0	ar.lc = saved_lc	// restore the loop counter | 
 | 371 | 	br.ret.sptk.many b0 | 
 | 372 | ;; } | 
 | 373 |  | 
 | 374 |  | 
 | 375 | .src_not_aligned: | 
 | 376 | { .mmi | 
 | 377 | 	cmp.gt	p_scr, p0 = 16, len | 
 | 378 | 	and	sh1 = 7, src 		// sh1 = src % 8 | 
 | 379 | 	shr.u	loopcnt = len, 4	// element-cnt = len / 16 | 
 | 380 | } { .mib | 
 | 381 | 	add	tmp4 = @ltoff(.table), gp | 
 | 382 | 	add 	tmp3 = @ltoff(.loop56), gp | 
 | 383 | (p_scr)	br.cond.dpnt.many .copy_bytes	// do byte by byte if too few | 
 | 384 | ;; } | 
 | 385 | { .mmi | 
 | 386 | 	and	asrc = -8, src		// asrc = (-8) -- align src for loop | 
 | 387 | 	add 	loopcnt = -1, loopcnt	// loopcnt-- | 
 | 388 | 	shl	sh1 = sh1, 3		// sh1 = 8 * (src % 8) | 
 | 389 | } { .mmi | 
 | 390 | 	ld8	ptable = [tmp4]		// ptable = &table | 
 | 391 | 	ld8	ploop56 = [tmp3]	// ploop56 = &loop56 | 
 | 392 | 	and	tmp2 = -16, len		// tmp2 = len & -OPSIZ | 
 | 393 | ;; } | 
 | 394 | { .mmi | 
 | 395 | 	add	tmp3 = ptable, sh1	// tmp3 = &table + sh1 | 
 | 396 | 	add	src = src, tmp2		// src += len & (-16) | 
 | 397 | 	movi0	ar.lc = loopcnt		// set LC | 
 | 398 | ;; } | 
 | 399 | { .mmi | 
 | 400 | 	ld8	tmp4 = [tmp3]		// tmp4 = loop offset | 
 | 401 | 	sub	len = len, tmp2		// len -= len & (-16) | 
 | 402 | 	movi0	ar.ec = MEMLAT + 2 	// one more pass needed | 
 | 403 | ;; } | 
 | 404 | { .mmi | 
 | 405 | 	ld8	s[1] = [asrc], 8	// preload | 
 | 406 | 	sub	loopaddr = ploop56,tmp4	// loopadd = &loop56 - loop offset | 
 | 407 | 	movi0   pr.rot = 1 << 16	// set rotating predicates | 
 | 408 | ;; } | 
 | 409 | { .mib | 
 | 410 | 	nop.m	0 | 
 | 411 | 	movi0	b6 = loopaddr | 
 | 412 | 	br	b6			// jump to the appropriate loop | 
 | 413 | ;; } | 
 | 414 |  | 
 | 415 | 	LOOP(8) | 
 | 416 | 	LOOP(16) | 
 | 417 | 	LOOP(24) | 
 | 418 | 	LOOP(32) | 
 | 419 | 	LOOP(40) | 
 | 420 | 	LOOP(48) | 
 | 421 | 	LOOP(56) | 
 | 422 | END(memcpy) | 
 | 423 | libc_hidden_builtin_def (memcpy) | 
 | 424 |  | 
 | 425 | 	.rodata | 
 | 426 | 	.align 8 | 
 | 427 | .table: | 
 | 428 | 	data8	0			// dummy entry | 
 | 429 | 	data8 	.loop56 - .loop8 | 
 | 430 | 	data8 	.loop56 - .loop16 | 
 | 431 | 	data8 	.loop56 - .loop24 | 
 | 432 | 	data8	.loop56 - .loop32 | 
 | 433 | 	data8	.loop56 - .loop40 | 
 | 434 | 	data8	.loop56 - .loop48 | 
 | 435 | 	data8	.loop56 - .loop56 |