sum_loong64.s

  1// Copyright 2025 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5//go:build gc && !purego
  6
  7// func update(state *macState, msg []byte)
  8TEXT ยทupdate(SB), $0-32
  9	MOVV	state+0(FP), R4
 10	MOVV	msg_base+8(FP), R5
 11	MOVV	msg_len+16(FP), R6
 12
 13	MOVV	$0x10, R7
 14
 15	MOVV	(R4), R8	// h0
 16	MOVV	8(R4), R9	// h1
 17	MOVV	16(R4), R10	// h2
 18	MOVV	24(R4), R11	// r0
 19	MOVV	32(R4), R12	// r1
 20
 21	BLT	R6, R7, bytes_between_0_and_15
 22
 23loop:
 24	MOVV	(R5), R14	// msg[0:8]
 25	MOVV	8(R5), R16	// msg[8:16]
 26	ADDV	R14, R8, R8	// h0 (x1 + y1 = z1', if z1' < x1 then z1' overflow)
 27	ADDV	R16, R9, R27
 28	SGTU	R14, R8, R24	// h0.carry
 29	SGTU	R9, R27, R28
 30	ADDV	R27, R24, R9	// h1
 31	SGTU	R27, R9, R24
 32	OR	R24, R28, R24	// h1.carry
 33	ADDV	$0x01, R24, R24
 34	ADDV	R10, R24, R10	// h2
 35
 36	ADDV	$16, R5, R5	// msg = msg[16:]
 37
 38multiply:
 39	MULV	R8, R11, R14	// h0r0.lo
 40	MULHVU	R8, R11, R15	// h0r0.hi
 41	MULV	R9, R11, R13	// h1r0.lo
 42	MULHVU	R9, R11, R16	// h1r0.hi
 43	ADDV	R13, R15, R15
 44	SGTU	R13, R15, R24
 45	ADDV	R24, R16, R16
 46	MULV	R10, R11, R25
 47	ADDV	R16, R25, R25
 48	MULV	R8, R12, R13	// h0r1.lo
 49	MULHVU	R8, R12, R16	// h0r1.hi
 50	ADDV	R13, R15, R15
 51	SGTU	R13, R15, R24
 52	ADDV	R24, R16, R16
 53	MOVV	R16, R8
 54	MULV	R10, R12, R26	// h2r1
 55	MULV	R9, R12, R13	// h1r1.lo
 56	MULHVU	R9, R12, R16	// h1r1.hi
 57	ADDV	R13, R25, R25
 58	ADDV	R16, R26, R27
 59	SGTU	R13, R25, R24
 60	ADDV	R27, R24, R26
 61	ADDV	R8, R25, R25
 62	SGTU	R8, R25, R24
 63	ADDV	R24, R26, R26
 64	AND	$3, R25, R10
 65	AND	$-4, R25, R17
 66	ADDV	R17, R14, R8
 67	ADDV	R26, R15, R27
 68	SGTU	R17, R8, R24
 69	SGTU	R26, R27, R28
 70	ADDV	R27, R24, R9
 71	SGTU	R27, R9, R24
 72	OR	R24, R28, R24
 73	ADDV	R24, R10, R10
 74	SLLV	$62, R26, R27
 75	SRLV	$2, R25, R28
 76	SRLV	$2, R26, R26
 77	OR	R27, R28, R25
 78	ADDV	R25, R8, R8
 79	ADDV	R26, R9, R27
 80	SGTU	R25, R8, R24
 81	SGTU	R26, R27, R28
 82	ADDV	R27, R24, R9
 83	SGTU	R27, R9, R24
 84	OR	R24, R28, R24
 85	ADDV	R24, R10, R10
 86
 87	SUBV	$16, R6, R6
 88	BGE	R6, R7, loop
 89
 90bytes_between_0_and_15:
 91	BEQ	R6, R0, done
 92	MOVV	$1, R14
 93	XOR	R15, R15
 94	ADDV	R6, R5, R5
 95
 96flush_buffer:
 97	MOVBU	-1(R5), R25
 98	SRLV	$56, R14, R24
 99	SLLV	$8, R15, R28
100	SLLV	$8, R14, R14
101	OR	R24, R28, R15
102	XOR	R25, R14, R14
103	SUBV	$1, R6, R6
104	SUBV	$1, R5, R5
105	BNE	R6, R0, flush_buffer
106
107	ADDV	R14, R8, R8
108	SGTU	R14, R8, R24
109	ADDV	R15, R9, R27
110	SGTU	R15, R27, R28
111	ADDV	R27, R24, R9
112	SGTU	R27, R9, R24
113	OR	R24, R28, R24
114	ADDV	R10, R24, R10
115
116	MOVV	$16, R6
117	JMP	multiply
118
119done:
120	MOVV	R8, (R4)
121	MOVV	R9, 8(R4)
122	MOVV	R10, 16(R4)
123	RET