sum_ppc64x.s

  1// Copyright 2019 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5//go:build gc && !purego && (ppc64 || ppc64le)
  6
  7#include "textflag.h"
  8
  9// This was ported from the amd64 implementation.
 10
 11#ifdef GOARCH_ppc64le
 12#define LE_MOVD MOVD
 13#define LE_MOVWZ MOVWZ
 14#define LE_MOVHZ MOVHZ
 15#else
 16#define LE_MOVD MOVDBR
 17#define LE_MOVWZ MOVWBR
 18#define LE_MOVHZ MOVHBR
 19#endif
 20
 21#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
 22	LE_MOVD (msg)( R0), t0; \
 23	LE_MOVD (msg)(R24), t1; \
 24	MOVD $1, t2;     \
 25	ADDC t0, h0, h0; \
 26	ADDE t1, h1, h1; \
 27	ADDE t2, h2;     \
 28	ADD  $16, msg
 29
 30#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
 31	MULLD  r0, h0, t0;  \
 32	MULHDU r0, h0, t1;  \
 33	MULLD  r0, h1, t4;  \
 34	MULHDU r0, h1, t5;  \
 35	ADDC   t4, t1, t1;  \
 36	MULLD  r0, h2, t2;  \
 37	MULHDU r1, h0, t4;  \
 38	MULLD  r1, h0, h0;  \
 39	ADDE   t5, t2, t2;  \
 40	ADDC   h0, t1, t1;  \
 41	MULLD  h2, r1, t3;  \
 42	ADDZE  t4, h0;      \
 43	MULHDU r1, h1, t5;  \
 44	MULLD  r1, h1, t4;  \
 45	ADDC   t4, t2, t2;  \
 46	ADDE   t5, t3, t3;  \
 47	ADDC   h0, t2, t2;  \
 48	MOVD   $-4, t4;     \
 49	ADDZE  t3;          \
 50	RLDICL $0, t2, $62, h2; \
 51	AND    t2, t4, h0;  \
 52	ADDC   t0, h0, h0;  \
 53	ADDE   t3, t1, h1;  \
 54	SLD    $62, t3, t4; \
 55	SRD    $2, t2;      \
 56	ADDZE  h2;          \
 57	OR     t4, t2, t2;  \
 58	SRD    $2, t3;      \
 59	ADDC   t2, h0, h0;  \
 60	ADDE   t3, h1, h1;  \
 61	ADDZE  h2
 62
 63// func update(state *[7]uint64, msg []byte)
 64TEXT ยทupdate(SB), $0-32
 65	MOVD state+0(FP), R3
 66	MOVD msg_base+8(FP), R4
 67	MOVD msg_len+16(FP), R5
 68
 69	MOVD 0(R3), R8   // h0
 70	MOVD 8(R3), R9   // h1
 71	MOVD 16(R3), R10 // h2
 72	MOVD 24(R3), R11 // r0
 73	MOVD 32(R3), R12 // r1
 74
 75	MOVD $8, R24
 76
 77	CMP R5, $16
 78	BLT bytes_between_0_and_15
 79
 80loop:
 81	POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
 82
 83	PCALIGN $16
 84multiply:
 85	POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
 86	ADD $-16, R5
 87	CMP R5, $16
 88	BGE loop
 89
 90bytes_between_0_and_15:
 91	CMP  R5, $0
 92	BEQ  done
 93	MOVD $0, R16 // h0
 94	MOVD $0, R17 // h1
 95
 96flush_buffer:
 97	CMP R5, $8
 98	BLE just1
 99
100	MOVD $8, R21
101	SUB  R21, R5, R21
102
103	// Greater than 8 -- load the rightmost remaining bytes in msg
104	// and put into R17 (h1)
105	LE_MOVD (R4)(R21), R17
106	MOVD $16, R22
107
108	// Find the offset to those bytes
109	SUB R5, R22, R22
110	SLD $3, R22
111
112	// Shift to get only the bytes in msg
113	SRD R22, R17, R17
114
115	// Put 1 at high end
116	MOVD $1, R23
117	SLD  $3, R21
118	SLD  R21, R23, R23
119	OR   R23, R17, R17
120
121	// Remainder is 8
122	MOVD $8, R5
123
124just1:
125	CMP R5, $8
126	BLT less8
127
128	// Exactly 8
129	LE_MOVD (R4), R16
130
131	CMP R17, $0
132
133	// Check if we've already set R17; if not
134	// set 1 to indicate end of msg.
135	BNE  carry
136	MOVD $1, R17
137	BR   carry
138
139less8:
140	MOVD  $0, R16   // h0
141	MOVD  $0, R22   // shift count
142	CMP   R5, $4
143	BLT   less4
144	LE_MOVWZ (R4), R16
145	ADD   $4, R4
146	ADD   $-4, R5
147	MOVD  $32, R22
148
149less4:
150	CMP   R5, $2
151	BLT   less2
152	LE_MOVHZ (R4), R21
153	SLD   R22, R21, R21
154	OR    R16, R21, R16
155	ADD   $16, R22
156	ADD   $-2, R5
157	ADD   $2, R4
158
159less2:
160	CMP   R5, $0
161	BEQ   insert1
162	MOVBZ (R4), R21
163	SLD   R22, R21, R21
164	OR    R16, R21, R16
165	ADD   $8, R22
166
167insert1:
168	// Insert 1 at end of msg
169	MOVD $1, R21
170	SLD  R22, R21, R21
171	OR   R16, R21, R16
172
173carry:
174	// Add new values to h0, h1, h2
175	ADDC  R16, R8
176	ADDE  R17, R9
177	ADDZE R10, R10
178	MOVD  $16, R5
179	ADD   R5, R4
180	BR    multiply
181
182done:
183	// Save h0, h1, h2 in state
184	MOVD R8, 0(R3)
185	MOVD R9, 8(R3)
186	MOVD R10, 16(R3)
187	RET