1package interpreter
2
3import (
4 "context"
5 "encoding/binary"
6 "errors"
7 "fmt"
8 "math"
9 "math/bits"
10 "sync"
11 "unsafe"
12
13 "github.com/tetratelabs/wazero/api"
14 "github.com/tetratelabs/wazero/experimental"
15 "github.com/tetratelabs/wazero/internal/expctxkeys"
16 "github.com/tetratelabs/wazero/internal/filecache"
17 "github.com/tetratelabs/wazero/internal/internalapi"
18 "github.com/tetratelabs/wazero/internal/moremath"
19 "github.com/tetratelabs/wazero/internal/wasm"
20 "github.com/tetratelabs/wazero/internal/wasmdebug"
21 "github.com/tetratelabs/wazero/internal/wasmruntime"
22)
23
24// callStackCeiling is the maximum WebAssembly call frame stack height. This allows wazero to raise
25// wasm.ErrCallStackOverflow instead of overflowing the Go runtime.
26//
27// The default value should suffice for most use cases. Those wishing to change this can via `go build -ldflags`.
28var callStackCeiling = 2000
29
30// engine is an interpreter implementation of wasm.Engine
31type engine struct {
32 enabledFeatures api.CoreFeatures
33 compiledFunctions map[wasm.ModuleID][]compiledFunction // guarded by mutex.
34 mux sync.RWMutex
35}
36
37func NewEngine(_ context.Context, enabledFeatures api.CoreFeatures, _ filecache.Cache) wasm.Engine {
38 return &engine{
39 enabledFeatures: enabledFeatures,
40 compiledFunctions: map[wasm.ModuleID][]compiledFunction{},
41 }
42}
43
44// Close implements the same method as documented on wasm.Engine.
45func (e *engine) Close() (err error) {
46 return
47}
48
49// CompiledModuleCount implements the same method as documented on wasm.Engine.
50func (e *engine) CompiledModuleCount() uint32 {
51 return uint32(len(e.compiledFunctions))
52}
53
54// DeleteCompiledModule implements the same method as documented on wasm.Engine.
55func (e *engine) DeleteCompiledModule(m *wasm.Module) {
56 e.deleteCompiledFunctions(m)
57}
58
59func (e *engine) deleteCompiledFunctions(module *wasm.Module) {
60 e.mux.Lock()
61 defer e.mux.Unlock()
62 delete(e.compiledFunctions, module.ID)
63}
64
65func (e *engine) addCompiledFunctions(module *wasm.Module, fs []compiledFunction) {
66 e.mux.Lock()
67 defer e.mux.Unlock()
68 e.compiledFunctions[module.ID] = fs
69}
70
71func (e *engine) getCompiledFunctions(module *wasm.Module) (fs []compiledFunction, ok bool) {
72 e.mux.RLock()
73 defer e.mux.RUnlock()
74 fs, ok = e.compiledFunctions[module.ID]
75 return
76}
77
78// moduleEngine implements wasm.ModuleEngine
79type moduleEngine struct {
80 // codes are the compiled functions in a module instances.
81 // The index is module instance-scoped.
82 functions []function
83
84 // parentEngine holds *engine from which this module engine is created from.
85 parentEngine *engine
86}
87
88// GetGlobalValue implements the same method as documented on wasm.ModuleEngine.
89func (e *moduleEngine) GetGlobalValue(wasm.Index) (lo, hi uint64) {
90 panic("BUG: GetGlobalValue should never be called on interpreter mode")
91}
92
93// SetGlobalValue implements the same method as documented on wasm.ModuleEngine.
94func (e *moduleEngine) SetGlobalValue(idx wasm.Index, lo, hi uint64) {
95 panic("BUG: SetGlobalValue should never be called on interpreter mode")
96}
97
98// OwnsGlobals implements the same method as documented on wasm.ModuleEngine.
99func (e *moduleEngine) OwnsGlobals() bool { return false }
100
101// MemoryGrown implements wasm.ModuleEngine.
102func (e *moduleEngine) MemoryGrown() {}
103
104// callEngine holds context per moduleEngine.Call, and shared across all the
105// function calls originating from the same moduleEngine.Call execution.
106//
107// This implements api.Function.
108type callEngine struct {
109 internalapi.WazeroOnlyType
110
111 // stack contains the operands.
112 // Note that all the values are represented as uint64.
113 stack []uint64
114
115 // frames are the function call stack.
116 frames []*callFrame
117
118 // f is the initial function for this call engine.
119 f *function
120
121 // stackiterator for Listeners to walk frames and stack.
122 stackIterator stackIterator
123}
124
125func (e *moduleEngine) newCallEngine(compiled *function) *callEngine {
126 return &callEngine{f: compiled}
127}
128
129func (ce *callEngine) pushValue(v uint64) {
130 ce.stack = append(ce.stack, v)
131}
132
133func (ce *callEngine) pushValues(v []uint64) {
134 ce.stack = append(ce.stack, v...)
135}
136
137func (ce *callEngine) popValue() (v uint64) {
138 // No need to check stack bound
139 // as we can assume that all the operations
140 // are valid thanks to validateFunction
141 // at module validation phase
142 // and interpreterir translation
143 // before compilation.
144 stackTopIndex := len(ce.stack) - 1
145 v = ce.stack[stackTopIndex]
146 ce.stack = ce.stack[:stackTopIndex]
147 return
148}
149
150func (ce *callEngine) popValues(v []uint64) {
151 stackTopIndex := len(ce.stack) - len(v)
152 copy(v, ce.stack[stackTopIndex:])
153 ce.stack = ce.stack[:stackTopIndex]
154}
155
156// peekValues peeks api.ValueType values from the stack and returns them.
157func (ce *callEngine) peekValues(count int) []uint64 {
158 if count == 0 {
159 return nil
160 }
161 stackLen := len(ce.stack)
162 return ce.stack[stackLen-count : stackLen]
163}
164
165func (ce *callEngine) drop(raw uint64) {
166 r := inclusiveRangeFromU64(raw)
167 if r.Start == -1 {
168 return
169 } else if r.Start == 0 {
170 ce.stack = ce.stack[:int32(len(ce.stack))-1-r.End]
171 } else {
172 newStack := ce.stack[:int32(len(ce.stack))-1-r.End]
173 newStack = append(newStack, ce.stack[int32(len(ce.stack))-r.Start:]...)
174 ce.stack = newStack
175 }
176}
177
178func (ce *callEngine) pushFrame(frame *callFrame) {
179 if callStackCeiling <= len(ce.frames) {
180 panic(wasmruntime.ErrRuntimeStackOverflow)
181 }
182 ce.frames = append(ce.frames, frame)
183}
184
185func (ce *callEngine) popFrame() (frame *callFrame) {
186 // No need to check stack bound as we can assume that all the operations are valid thanks to validateFunction at
187 // module validation phase and interpreterir translation before compilation.
188 oneLess := len(ce.frames) - 1
189 frame = ce.frames[oneLess]
190 ce.frames = ce.frames[:oneLess]
191 return
192}
193
194type callFrame struct {
195 // pc is the program counter representing the current position in code.body.
196 pc uint64
197 // f is the compiled function used in this function frame.
198 f *function
199 // base index in the frame of this function, used to detect the count of
200 // values on the stack.
201 base int
202}
203
204type compiledFunction struct {
205 source *wasm.Module
206 body []unionOperation
207 listener experimental.FunctionListener
208 offsetsInWasmBinary []uint64
209 hostFn interface{}
210 ensureTermination bool
211 index wasm.Index
212}
213
214type function struct {
215 funcType *wasm.FunctionType
216 moduleInstance *wasm.ModuleInstance
217 typeID wasm.FunctionTypeID
218 parent *compiledFunction
219}
220
221// functionFromUintptr resurrects the original *function from the given uintptr
222// which comes from either funcref table or OpcodeRefFunc instruction.
223func functionFromUintptr(ptr uintptr) *function {
224 // Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector.
225 //
226 // For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr"
227 // subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation"
228 // https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69
229 var wrapped *uintptr = &ptr
230 return *(**function)(unsafe.Pointer(wrapped))
231}
232
233type snapshot struct {
234 stack []uint64
235 frames []*callFrame
236 pc uint64
237
238 ret []uint64
239
240 ce *callEngine
241}
242
243// Snapshot implements the same method as documented on experimental.Snapshotter.
244func (ce *callEngine) Snapshot() experimental.Snapshot {
245 stack := make([]uint64, len(ce.stack))
246 copy(stack, ce.stack)
247
248 frames := make([]*callFrame, len(ce.frames))
249 copy(frames, ce.frames)
250
251 return &snapshot{
252 stack: stack,
253 frames: frames,
254 ce: ce,
255 }
256}
257
258// Restore implements the same method as documented on experimental.Snapshot.
259func (s *snapshot) Restore(ret []uint64) {
260 s.ret = ret
261 panic(s)
262}
263
264func (s *snapshot) doRestore() {
265 ce := s.ce
266
267 ce.stack = s.stack
268 ce.frames = s.frames
269 ce.frames[len(ce.frames)-1].pc = s.pc
270
271 copy(ce.stack[len(ce.stack)-len(s.ret):], s.ret)
272}
273
274// Error implements the same method on error.
275func (s *snapshot) Error() string {
276 return "unhandled snapshot restore, this generally indicates restore was called from a different " +
277 "exported function invocation than snapshot"
278}
279
280// stackIterator implements experimental.StackIterator.
281type stackIterator struct {
282 stack []uint64
283 frames []*callFrame
284 started bool
285 fn *function
286 pc uint64
287}
288
289func (si *stackIterator) reset(stack []uint64, frames []*callFrame, f *function) {
290 si.fn = f
291 si.pc = 0
292 si.stack = stack
293 si.frames = frames
294 si.started = false
295}
296
297func (si *stackIterator) clear() {
298 si.stack = nil
299 si.frames = nil
300 si.started = false
301 si.fn = nil
302}
303
304// Next implements the same method as documented on experimental.StackIterator.
305func (si *stackIterator) Next() bool {
306 if !si.started {
307 si.started = true
308 return true
309 }
310
311 if len(si.frames) == 0 {
312 return false
313 }
314
315 frame := si.frames[len(si.frames)-1]
316 si.stack = si.stack[:frame.base]
317 si.fn = frame.f
318 si.pc = frame.pc
319 si.frames = si.frames[:len(si.frames)-1]
320 return true
321}
322
323// Function implements the same method as documented on
324// experimental.StackIterator.
325func (si *stackIterator) Function() experimental.InternalFunction {
326 return internalFunction{si.fn}
327}
328
329// ProgramCounter implements the same method as documented on
330// experimental.StackIterator.
331func (si *stackIterator) ProgramCounter() experimental.ProgramCounter {
332 return experimental.ProgramCounter(si.pc)
333}
334
335// internalFunction implements experimental.InternalFunction.
336type internalFunction struct{ *function }
337
338// Definition implements the same method as documented on
339// experimental.InternalFunction.
340func (f internalFunction) Definition() api.FunctionDefinition {
341 return f.definition()
342}
343
344// SourceOffsetForPC implements the same method as documented on
345// experimental.InternalFunction.
346func (f internalFunction) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 {
347 offsetsMap := f.parent.offsetsInWasmBinary
348 if uint64(pc) < uint64(len(offsetsMap)) {
349 return offsetsMap[pc]
350 }
351 return 0
352}
353
354// interpreter mode doesn't maintain call frames in the stack, so pass the zero size to the IR.
355const callFrameStackSize = 0
356
357// CompileModule implements the same method as documented on wasm.Engine.
358func (e *engine) CompileModule(_ context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) error {
359 if _, ok := e.getCompiledFunctions(module); ok { // cache hit!
360 return nil
361 }
362
363 funcs := make([]compiledFunction, len(module.FunctionSection))
364 irCompiler, err := newCompiler(e.enabledFeatures, callFrameStackSize, module, ensureTermination)
365 if err != nil {
366 return err
367 }
368 imported := module.ImportFunctionCount
369 for i := range module.CodeSection {
370 var lsn experimental.FunctionListener
371 if i < len(listeners) {
372 lsn = listeners[i]
373 }
374
375 compiled := &funcs[i]
376 // If this is the host function, there's nothing to do as the runtime representation of
377 // host function in interpreter is its Go function itself as opposed to Wasm functions,
378 // which need to be compiled down to
379 if codeSeg := &module.CodeSection[i]; codeSeg.GoFunc != nil {
380 compiled.hostFn = codeSeg.GoFunc
381 } else {
382 ir, err := irCompiler.Next()
383 if err != nil {
384 return err
385 }
386 err = e.lowerIR(ir, compiled)
387 if err != nil {
388 def := module.FunctionDefinition(uint32(i) + module.ImportFunctionCount)
389 return fmt.Errorf("failed to lower func[%s] to interpreterir: %w", def.DebugName(), err)
390 }
391 }
392 compiled.source = module
393 compiled.ensureTermination = ensureTermination
394 compiled.listener = lsn
395 compiled.index = imported + uint32(i)
396 }
397 e.addCompiledFunctions(module, funcs)
398 return nil
399}
400
401// NewModuleEngine implements the same method as documented on wasm.Engine.
402func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInstance) (wasm.ModuleEngine, error) {
403 me := &moduleEngine{
404 parentEngine: e,
405 functions: make([]function, len(module.FunctionSection)+int(module.ImportFunctionCount)),
406 }
407
408 codes, ok := e.getCompiledFunctions(module)
409 if !ok {
410 return nil, errors.New("source module must be compiled before instantiation")
411 }
412
413 for i := range codes {
414 c := &codes[i]
415 offset := i + int(module.ImportFunctionCount)
416 typeIndex := module.FunctionSection[i]
417 me.functions[offset] = function{
418 moduleInstance: instance,
419 typeID: instance.TypeIDs[typeIndex],
420 funcType: &module.TypeSection[typeIndex],
421 parent: c,
422 }
423 }
424 return me, nil
425}
426
427// lowerIR lowers the interpreterir operations to engine friendly struct.
428func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error {
429 // Copy the body from the result.
430 ret.body = make([]unionOperation, len(ir.Operations))
431 copy(ret.body, ir.Operations)
432 // Also copy the offsets if necessary.
433 if offsets := ir.IROperationSourceOffsetsInWasmBinary; len(offsets) > 0 {
434 ret.offsetsInWasmBinary = make([]uint64, len(offsets))
435 copy(ret.offsetsInWasmBinary, offsets)
436 }
437
438 labelAddressResolutions := [labelKindNum][]uint64{}
439
440 // First, we iterate all labels, and resolve the address.
441 for i := range ret.body {
442 op := &ret.body[i]
443 switch op.Kind {
444 case operationKindLabel:
445 label := label(op.U1)
446 address := uint64(i)
447
448 kind, fid := label.Kind(), label.FrameID()
449 frameToAddresses := labelAddressResolutions[label.Kind()]
450 // Expand the slice if necessary.
451 if diff := fid - len(frameToAddresses) + 1; diff > 0 {
452 for j := 0; j < diff; j++ {
453 frameToAddresses = append(frameToAddresses, 0)
454 }
455 }
456 frameToAddresses[fid] = address
457 labelAddressResolutions[kind] = frameToAddresses
458 }
459 }
460
461 // Then resolve the label as the index to the body.
462 for i := range ret.body {
463 op := &ret.body[i]
464 switch op.Kind {
465 case operationKindBr:
466 e.setLabelAddress(&op.U1, label(op.U1), labelAddressResolutions)
467 case operationKindBrIf:
468 e.setLabelAddress(&op.U1, label(op.U1), labelAddressResolutions)
469 e.setLabelAddress(&op.U2, label(op.U2), labelAddressResolutions)
470 case operationKindBrTable:
471 for j := 0; j < len(op.Us); j += 2 {
472 target := op.Us[j]
473 e.setLabelAddress(&op.Us[j], label(target), labelAddressResolutions)
474 }
475 }
476 }
477 return nil
478}
479
480func (e *engine) setLabelAddress(op *uint64, label label, labelAddressResolutions [labelKindNum][]uint64) {
481 if label.IsReturnTarget() {
482 // Jmp to the end of the possible binary.
483 *op = math.MaxUint64
484 } else {
485 *op = labelAddressResolutions[label.Kind()][label.FrameID()]
486 }
487}
488
489// ResolveImportedFunction implements wasm.ModuleEngine.
490func (e *moduleEngine) ResolveImportedFunction(index, descFunc, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) {
491 imported := importedModuleEngine.(*moduleEngine)
492 e.functions[index] = imported.functions[indexInImportedModule]
493}
494
495// ResolveImportedMemory implements wasm.ModuleEngine.
496func (e *moduleEngine) ResolveImportedMemory(wasm.ModuleEngine) {}
497
498// DoneInstantiation implements wasm.ModuleEngine.
499func (e *moduleEngine) DoneInstantiation() {}
500
501// FunctionInstanceReference implements the same method as documented on wasm.ModuleEngine.
502func (e *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference {
503 return uintptr(unsafe.Pointer(&e.functions[funcIndex]))
504}
505
506// NewFunction implements the same method as documented on wasm.ModuleEngine.
507func (e *moduleEngine) NewFunction(index wasm.Index) (ce api.Function) {
508 // Note: The input parameters are pre-validated, so a compiled function is only absent on close. Updates to
509 // code on close aren't locked, neither is this read.
510 compiled := &e.functions[index]
511 return e.newCallEngine(compiled)
512}
513
514// LookupFunction implements the same method as documented on wasm.ModuleEngine.
515func (e *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (*wasm.ModuleInstance, wasm.Index) {
516 if tableOffset >= uint32(len(t.References)) {
517 panic(wasmruntime.ErrRuntimeInvalidTableAccess)
518 }
519 rawPtr := t.References[tableOffset]
520 if rawPtr == 0 {
521 panic(wasmruntime.ErrRuntimeInvalidTableAccess)
522 }
523
524 tf := functionFromUintptr(rawPtr)
525 if tf.typeID != typeId {
526 panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
527 }
528 return tf.moduleInstance, tf.parent.index
529}
530
531// Definition implements the same method as documented on api.Function.
532func (ce *callEngine) Definition() api.FunctionDefinition {
533 return ce.f.definition()
534}
535
536func (f *function) definition() api.FunctionDefinition {
537 compiled := f.parent
538 return compiled.source.FunctionDefinition(compiled.index)
539}
540
541// Call implements the same method as documented on api.Function.
542func (ce *callEngine) Call(ctx context.Context, params ...uint64) (results []uint64, err error) {
543 ft := ce.f.funcType
544 if n := ft.ParamNumInUint64; n != len(params) {
545 return nil, fmt.Errorf("expected %d params, but passed %d", n, len(params))
546 }
547 return ce.call(ctx, params, nil)
548}
549
550// CallWithStack implements the same method as documented on api.Function.
551func (ce *callEngine) CallWithStack(ctx context.Context, stack []uint64) error {
552 params, results, err := wasm.SplitCallStack(ce.f.funcType, stack)
553 if err != nil {
554 return err
555 }
556 _, err = ce.call(ctx, params, results)
557 return err
558}
559
560func (ce *callEngine) call(ctx context.Context, params, results []uint64) (_ []uint64, err error) {
561 m := ce.f.moduleInstance
562 if ce.f.parent.ensureTermination {
563 select {
564 case <-ctx.Done():
565 // If the provided context is already done, close the call context
566 // and return the error.
567 m.CloseWithCtxErr(ctx)
568 return nil, m.FailIfClosed()
569 default:
570 }
571 }
572
573 if ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil {
574 ctx = context.WithValue(ctx, expctxkeys.SnapshotterKey{}, ce)
575 }
576
577 defer func() {
578 // If the module closed during the call, and the call didn't err for another reason, set an ExitError.
579 if err == nil {
580 err = m.FailIfClosed()
581 }
582 // TODO: ^^ Will not fail if the function was imported from a closed module.
583
584 if v := recover(); v != nil {
585 err = ce.recoverOnCall(ctx, m, v)
586 }
587 }()
588
589 ce.pushValues(params)
590
591 if ce.f.parent.ensureTermination {
592 done := m.CloseModuleOnCanceledOrTimeout(ctx)
593 defer done()
594 }
595
596 ce.callFunction(ctx, m, ce.f)
597
598 // This returns a safe copy of the results, instead of a slice view. If we
599 // returned a re-slice, the caller could accidentally or purposefully
600 // corrupt the stack of subsequent calls.
601 ft := ce.f.funcType
602 if results == nil && ft.ResultNumInUint64 > 0 {
603 results = make([]uint64, ft.ResultNumInUint64)
604 }
605 ce.popValues(results)
606 return results, nil
607}
608
609// functionListenerInvocation captures arguments needed to perform function
610// listener invocations when unwinding the call stack.
611type functionListenerInvocation struct {
612 experimental.FunctionListener
613 def api.FunctionDefinition
614}
615
616// recoverOnCall takes the recovered value `recoverOnCall`, and wraps it
617// with the call frame stack traces. Also, reset the state of callEngine
618// so that it can be used for the subsequent calls.
619func (ce *callEngine) recoverOnCall(ctx context.Context, m *wasm.ModuleInstance, v interface{}) (err error) {
620 if s, ok := v.(*snapshot); ok {
621 // A snapshot that wasn't handled was created by a different call engine possibly from a nested wasm invocation,
622 // let it propagate up to be handled by the caller.
623 panic(s)
624 }
625
626 builder := wasmdebug.NewErrorBuilder()
627 frameCount := len(ce.frames)
628 functionListeners := make([]functionListenerInvocation, 0, 16)
629
630 if frameCount > wasmdebug.MaxFrames {
631 frameCount = wasmdebug.MaxFrames
632 }
633 for i := 0; i < frameCount; i++ {
634 frame := ce.popFrame()
635 f := frame.f
636 def := f.definition()
637 var sources []string
638 if parent := frame.f.parent; parent.body != nil && len(parent.offsetsInWasmBinary) > 0 {
639 sources = parent.source.DWARFLines.Line(parent.offsetsInWasmBinary[frame.pc])
640 }
641 builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)
642 if f.parent.listener != nil {
643 functionListeners = append(functionListeners, functionListenerInvocation{
644 FunctionListener: f.parent.listener,
645 def: f.definition(),
646 })
647 }
648 }
649
650 err = builder.FromRecovered(v)
651 for i := range functionListeners {
652 functionListeners[i].Abort(ctx, m, functionListeners[i].def, err)
653 }
654
655 // Allows the reuse of CallEngine.
656 ce.stack, ce.frames = ce.stack[:0], ce.frames[:0]
657 return
658}
659
660func (ce *callEngine) callFunction(ctx context.Context, m *wasm.ModuleInstance, f *function) {
661 if f.parent.hostFn != nil {
662 ce.callGoFuncWithStack(ctx, m, f)
663 } else if lsn := f.parent.listener; lsn != nil {
664 ce.callNativeFuncWithListener(ctx, m, f, lsn)
665 } else {
666 ce.callNativeFunc(ctx, m, f)
667 }
668}
669
670func (ce *callEngine) callGoFunc(ctx context.Context, m *wasm.ModuleInstance, f *function, stack []uint64) {
671 typ := f.funcType
672 lsn := f.parent.listener
673 if lsn != nil {
674 params := stack[:typ.ParamNumInUint64]
675 ce.stackIterator.reset(ce.stack, ce.frames, f)
676 lsn.Before(ctx, m, f.definition(), params, &ce.stackIterator)
677 ce.stackIterator.clear()
678 }
679 frame := &callFrame{f: f, base: len(ce.stack)}
680 ce.pushFrame(frame)
681
682 fn := f.parent.hostFn
683 switch fn := fn.(type) {
684 case api.GoModuleFunction:
685 fn.Call(ctx, m, stack)
686 case api.GoFunction:
687 fn.Call(ctx, stack)
688 }
689
690 ce.popFrame()
691 if lsn != nil {
692 // TODO: This doesn't get the error due to use of panic to propagate them.
693 results := stack[:typ.ResultNumInUint64]
694 lsn.After(ctx, m, f.definition(), results)
695 }
696}
697
698func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance, f *function) {
699 frame := &callFrame{f: f, base: len(ce.stack)}
700 moduleInst := f.moduleInstance
701 functions := moduleInst.Engine.(*moduleEngine).functions
702 memoryInst := moduleInst.MemoryInstance
703 globals := moduleInst.Globals
704 tables := moduleInst.Tables
705 typeIDs := moduleInst.TypeIDs
706 dataInstances := moduleInst.DataInstances
707 elementInstances := moduleInst.ElementInstances
708 ce.pushFrame(frame)
709 body := frame.f.parent.body
710 bodyLen := uint64(len(body))
711 for frame.pc < bodyLen {
712 op := &body[frame.pc]
713 // TODO: add description of each operation/case
714 // on, for example, how many args are used,
715 // how the stack is modified, etc.
716 switch op.Kind {
717 case operationKindBuiltinFunctionCheckExitCode:
718 if err := m.FailIfClosed(); err != nil {
719 panic(err)
720 }
721 frame.pc++
722 case operationKindUnreachable:
723 panic(wasmruntime.ErrRuntimeUnreachable)
724 case operationKindBr:
725 frame.pc = op.U1
726 case operationKindBrIf:
727 if ce.popValue() > 0 {
728 ce.drop(op.U3)
729 frame.pc = op.U1
730 } else {
731 frame.pc = op.U2
732 }
733 case operationKindBrTable:
734 v := ce.popValue()
735 defaultAt := uint64(len(op.Us))/2 - 1
736 if v > defaultAt {
737 v = defaultAt
738 }
739 v *= 2
740 ce.drop(op.Us[v+1])
741 frame.pc = op.Us[v]
742 case operationKindCall:
743 func() {
744 if ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil {
745 defer func() {
746 if r := recover(); r != nil {
747 if s, ok := r.(*snapshot); ok && s.ce == ce {
748 s.doRestore()
749 frame = ce.frames[len(ce.frames)-1]
750 body = frame.f.parent.body
751 bodyLen = uint64(len(body))
752 } else {
753 panic(r)
754 }
755 }
756 }()
757 }
758 ce.callFunction(ctx, f.moduleInstance, &functions[op.U1])
759 }()
760 frame.pc++
761 case operationKindCallIndirect:
762 offset := ce.popValue()
763 table := tables[op.U2]
764 if offset >= uint64(len(table.References)) {
765 panic(wasmruntime.ErrRuntimeInvalidTableAccess)
766 }
767 rawPtr := table.References[offset]
768 if rawPtr == 0 {
769 panic(wasmruntime.ErrRuntimeInvalidTableAccess)
770 }
771
772 tf := functionFromUintptr(rawPtr)
773 if tf.typeID != typeIDs[op.U1] {
774 panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
775 }
776
777 ce.callFunction(ctx, f.moduleInstance, tf)
778 frame.pc++
779 case operationKindDrop:
780 ce.drop(op.U1)
781 frame.pc++
782 case operationKindSelect:
783 c := ce.popValue()
784 if op.B3 { // Target is vector.
785 x2Hi, x2Lo := ce.popValue(), ce.popValue()
786 if c == 0 {
787 _, _ = ce.popValue(), ce.popValue() // discard the x1's lo and hi bits.
788 ce.pushValue(x2Lo)
789 ce.pushValue(x2Hi)
790 }
791 } else {
792 v2 := ce.popValue()
793 if c == 0 {
794 _ = ce.popValue()
795 ce.pushValue(v2)
796 }
797 }
798 frame.pc++
799 case operationKindPick:
800 index := len(ce.stack) - 1 - int(op.U1)
801 ce.pushValue(ce.stack[index])
802 if op.B3 { // V128 value target.
803 ce.pushValue(ce.stack[index+1])
804 }
805 frame.pc++
806 case operationKindSet:
807 if op.B3 { // V128 value target.
808 lowIndex := len(ce.stack) - 1 - int(op.U1)
809 highIndex := lowIndex + 1
810 hi, lo := ce.popValue(), ce.popValue()
811 ce.stack[lowIndex], ce.stack[highIndex] = lo, hi
812 } else {
813 index := len(ce.stack) - 1 - int(op.U1)
814 ce.stack[index] = ce.popValue()
815 }
816 frame.pc++
817 case operationKindGlobalGet:
818 g := globals[op.U1]
819 ce.pushValue(g.Val)
820 if g.Type.ValType == wasm.ValueTypeV128 {
821 ce.pushValue(g.ValHi)
822 }
823 frame.pc++
824 case operationKindGlobalSet:
825 g := globals[op.U1]
826 if g.Type.ValType == wasm.ValueTypeV128 {
827 g.ValHi = ce.popValue()
828 }
829 g.Val = ce.popValue()
830 frame.pc++
831 case operationKindLoad:
832 offset := ce.popMemoryOffset(op)
833 switch unsignedType(op.B1) {
834 case unsignedTypeI32, unsignedTypeF32:
835 if val, ok := memoryInst.ReadUint32Le(offset); !ok {
836 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
837 } else {
838 ce.pushValue(uint64(val))
839 }
840 case unsignedTypeI64, unsignedTypeF64:
841 if val, ok := memoryInst.ReadUint64Le(offset); !ok {
842 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
843 } else {
844 ce.pushValue(val)
845 }
846 }
847 frame.pc++
848 case operationKindLoad8:
849 val, ok := memoryInst.ReadByte(ce.popMemoryOffset(op))
850 if !ok {
851 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
852 }
853
854 switch signedInt(op.B1) {
855 case signedInt32:
856 ce.pushValue(uint64(uint32(int8(val))))
857 case signedInt64:
858 ce.pushValue(uint64(int8(val)))
859 case signedUint32, signedUint64:
860 ce.pushValue(uint64(val))
861 }
862 frame.pc++
863 case operationKindLoad16:
864
865 val, ok := memoryInst.ReadUint16Le(ce.popMemoryOffset(op))
866 if !ok {
867 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
868 }
869
870 switch signedInt(op.B1) {
871 case signedInt32:
872 ce.pushValue(uint64(uint32(int16(val))))
873 case signedInt64:
874 ce.pushValue(uint64(int16(val)))
875 case signedUint32, signedUint64:
876 ce.pushValue(uint64(val))
877 }
878 frame.pc++
879 case operationKindLoad32:
880 val, ok := memoryInst.ReadUint32Le(ce.popMemoryOffset(op))
881 if !ok {
882 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
883 }
884
885 if op.B1 == 1 { // Signed
886 ce.pushValue(uint64(int32(val)))
887 } else {
888 ce.pushValue(uint64(val))
889 }
890 frame.pc++
891 case operationKindStore:
892 val := ce.popValue()
893 offset := ce.popMemoryOffset(op)
894 switch unsignedType(op.B1) {
895 case unsignedTypeI32, unsignedTypeF32:
896 if !memoryInst.WriteUint32Le(offset, uint32(val)) {
897 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
898 }
899 case unsignedTypeI64, unsignedTypeF64:
900 if !memoryInst.WriteUint64Le(offset, val) {
901 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
902 }
903 }
904 frame.pc++
905 case operationKindStore8:
906 val := byte(ce.popValue())
907 offset := ce.popMemoryOffset(op)
908 if !memoryInst.WriteByte(offset, val) {
909 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
910 }
911 frame.pc++
912 case operationKindStore16:
913 val := uint16(ce.popValue())
914 offset := ce.popMemoryOffset(op)
915 if !memoryInst.WriteUint16Le(offset, val) {
916 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
917 }
918 frame.pc++
919 case operationKindStore32:
920 val := uint32(ce.popValue())
921 offset := ce.popMemoryOffset(op)
922 if !memoryInst.WriteUint32Le(offset, val) {
923 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
924 }
925 frame.pc++
926 case operationKindMemorySize:
927 ce.pushValue(uint64(memoryInst.Pages()))
928 frame.pc++
929 case operationKindMemoryGrow:
930 n := ce.popValue()
931 if res, ok := memoryInst.Grow(uint32(n)); !ok {
932 ce.pushValue(uint64(0xffffffff)) // = -1 in signed 32-bit integer.
933 } else {
934 ce.pushValue(uint64(res))
935 }
936 frame.pc++
937 case operationKindConstI32, operationKindConstI64,
938 operationKindConstF32, operationKindConstF64:
939 ce.pushValue(op.U1)
940 frame.pc++
941 case operationKindEq:
942 var b bool
943 switch unsignedType(op.B1) {
944 case unsignedTypeI32:
945 v2, v1 := ce.popValue(), ce.popValue()
946 b = uint32(v1) == uint32(v2)
947 case unsignedTypeI64:
948 v2, v1 := ce.popValue(), ce.popValue()
949 b = v1 == v2
950 case unsignedTypeF32:
951 v2, v1 := ce.popValue(), ce.popValue()
952 b = math.Float32frombits(uint32(v2)) == math.Float32frombits(uint32(v1))
953 case unsignedTypeF64:
954 v2, v1 := ce.popValue(), ce.popValue()
955 b = math.Float64frombits(v2) == math.Float64frombits(v1)
956 }
957 if b {
958 ce.pushValue(1)
959 } else {
960 ce.pushValue(0)
961 }
962 frame.pc++
963 case operationKindNe:
964 var b bool
965 switch unsignedType(op.B1) {
966 case unsignedTypeI32, unsignedTypeI64:
967 v2, v1 := ce.popValue(), ce.popValue()
968 b = v1 != v2
969 case unsignedTypeF32:
970 v2, v1 := ce.popValue(), ce.popValue()
971 b = math.Float32frombits(uint32(v2)) != math.Float32frombits(uint32(v1))
972 case unsignedTypeF64:
973 v2, v1 := ce.popValue(), ce.popValue()
974 b = math.Float64frombits(v2) != math.Float64frombits(v1)
975 }
976 if b {
977 ce.pushValue(1)
978 } else {
979 ce.pushValue(0)
980 }
981 frame.pc++
982 case operationKindEqz:
983 if ce.popValue() == 0 {
984 ce.pushValue(1)
985 } else {
986 ce.pushValue(0)
987 }
988 frame.pc++
989 case operationKindLt:
990 v2 := ce.popValue()
991 v1 := ce.popValue()
992 var b bool
993 switch signedType(op.B1) {
994 case signedTypeInt32:
995 b = int32(v1) < int32(v2)
996 case signedTypeInt64:
997 b = int64(v1) < int64(v2)
998 case signedTypeUint32, signedTypeUint64:
999 b = v1 < v2
1000 case signedTypeFloat32:
1001 b = math.Float32frombits(uint32(v1)) < math.Float32frombits(uint32(v2))
1002 case signedTypeFloat64:
1003 b = math.Float64frombits(v1) < math.Float64frombits(v2)
1004 }
1005 if b {
1006 ce.pushValue(1)
1007 } else {
1008 ce.pushValue(0)
1009 }
1010 frame.pc++
1011 case operationKindGt:
1012 v2 := ce.popValue()
1013 v1 := ce.popValue()
1014 var b bool
1015 switch signedType(op.B1) {
1016 case signedTypeInt32:
1017 b = int32(v1) > int32(v2)
1018 case signedTypeInt64:
1019 b = int64(v1) > int64(v2)
1020 case signedTypeUint32, signedTypeUint64:
1021 b = v1 > v2
1022 case signedTypeFloat32:
1023 b = math.Float32frombits(uint32(v1)) > math.Float32frombits(uint32(v2))
1024 case signedTypeFloat64:
1025 b = math.Float64frombits(v1) > math.Float64frombits(v2)
1026 }
1027 if b {
1028 ce.pushValue(1)
1029 } else {
1030 ce.pushValue(0)
1031 }
1032 frame.pc++
1033 case operationKindLe:
1034 v2 := ce.popValue()
1035 v1 := ce.popValue()
1036 var b bool
1037 switch signedType(op.B1) {
1038 case signedTypeInt32:
1039 b = int32(v1) <= int32(v2)
1040 case signedTypeInt64:
1041 b = int64(v1) <= int64(v2)
1042 case signedTypeUint32, signedTypeUint64:
1043 b = v1 <= v2
1044 case signedTypeFloat32:
1045 b = math.Float32frombits(uint32(v1)) <= math.Float32frombits(uint32(v2))
1046 case signedTypeFloat64:
1047 b = math.Float64frombits(v1) <= math.Float64frombits(v2)
1048 }
1049 if b {
1050 ce.pushValue(1)
1051 } else {
1052 ce.pushValue(0)
1053 }
1054 frame.pc++
1055 case operationKindGe:
1056 v2 := ce.popValue()
1057 v1 := ce.popValue()
1058 var b bool
1059 switch signedType(op.B1) {
1060 case signedTypeInt32:
1061 b = int32(v1) >= int32(v2)
1062 case signedTypeInt64:
1063 b = int64(v1) >= int64(v2)
1064 case signedTypeUint32, signedTypeUint64:
1065 b = v1 >= v2
1066 case signedTypeFloat32:
1067 b = math.Float32frombits(uint32(v1)) >= math.Float32frombits(uint32(v2))
1068 case signedTypeFloat64:
1069 b = math.Float64frombits(v1) >= math.Float64frombits(v2)
1070 }
1071 if b {
1072 ce.pushValue(1)
1073 } else {
1074 ce.pushValue(0)
1075 }
1076 frame.pc++
1077 case operationKindAdd:
1078 v2 := ce.popValue()
1079 v1 := ce.popValue()
1080 switch unsignedType(op.B1) {
1081 case unsignedTypeI32:
1082 v := uint32(v1) + uint32(v2)
1083 ce.pushValue(uint64(v))
1084 case unsignedTypeI64:
1085 ce.pushValue(v1 + v2)
1086 case unsignedTypeF32:
1087 ce.pushValue(addFloat32bits(uint32(v1), uint32(v2)))
1088 case unsignedTypeF64:
1089 v := math.Float64frombits(v1) + math.Float64frombits(v2)
1090 ce.pushValue(math.Float64bits(v))
1091 }
1092 frame.pc++
1093 case operationKindSub:
1094 v2 := ce.popValue()
1095 v1 := ce.popValue()
1096 switch unsignedType(op.B1) {
1097 case unsignedTypeI32:
1098 ce.pushValue(uint64(uint32(v1) - uint32(v2)))
1099 case unsignedTypeI64:
1100 ce.pushValue(v1 - v2)
1101 case unsignedTypeF32:
1102 ce.pushValue(subFloat32bits(uint32(v1), uint32(v2)))
1103 case unsignedTypeF64:
1104 v := math.Float64frombits(v1) - math.Float64frombits(v2)
1105 ce.pushValue(math.Float64bits(v))
1106 }
1107 frame.pc++
1108 case operationKindMul:
1109 v2 := ce.popValue()
1110 v1 := ce.popValue()
1111 switch unsignedType(op.B1) {
1112 case unsignedTypeI32:
1113 ce.pushValue(uint64(uint32(v1) * uint32(v2)))
1114 case unsignedTypeI64:
1115 ce.pushValue(v1 * v2)
1116 case unsignedTypeF32:
1117 ce.pushValue(mulFloat32bits(uint32(v1), uint32(v2)))
1118 case unsignedTypeF64:
1119 v := math.Float64frombits(v2) * math.Float64frombits(v1)
1120 ce.pushValue(math.Float64bits(v))
1121 }
1122 frame.pc++
1123 case operationKindClz:
1124 v := ce.popValue()
1125 if op.B1 == 0 {
1126 // unsignedInt32
1127 ce.pushValue(uint64(bits.LeadingZeros32(uint32(v))))
1128 } else {
1129 // unsignedInt64
1130 ce.pushValue(uint64(bits.LeadingZeros64(v)))
1131 }
1132 frame.pc++
1133 case operationKindCtz:
1134 v := ce.popValue()
1135 if op.B1 == 0 {
1136 // unsignedInt32
1137 ce.pushValue(uint64(bits.TrailingZeros32(uint32(v))))
1138 } else {
1139 // unsignedInt64
1140 ce.pushValue(uint64(bits.TrailingZeros64(v)))
1141 }
1142 frame.pc++
1143 case operationKindPopcnt:
1144 v := ce.popValue()
1145 if op.B1 == 0 {
1146 // unsignedInt32
1147 ce.pushValue(uint64(bits.OnesCount32(uint32(v))))
1148 } else {
1149 // unsignedInt64
1150 ce.pushValue(uint64(bits.OnesCount64(v)))
1151 }
1152 frame.pc++
1153 case operationKindDiv:
1154 // If an integer, check we won't divide by zero.
1155 t := signedType(op.B1)
1156 v2, v1 := ce.popValue(), ce.popValue()
1157 switch t {
1158 case signedTypeFloat32, signedTypeFloat64: // not integers
1159 default:
1160 if v2 == 0 {
1161 panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
1162 }
1163 }
1164
1165 switch t {
1166 case signedTypeInt32:
1167 d := int32(v2)
1168 n := int32(v1)
1169 if n == math.MinInt32 && d == -1 {
1170 panic(wasmruntime.ErrRuntimeIntegerOverflow)
1171 }
1172 ce.pushValue(uint64(uint32(n / d)))
1173 case signedTypeInt64:
1174 d := int64(v2)
1175 n := int64(v1)
1176 if n == math.MinInt64 && d == -1 {
1177 panic(wasmruntime.ErrRuntimeIntegerOverflow)
1178 }
1179 ce.pushValue(uint64(n / d))
1180 case signedTypeUint32:
1181 d := uint32(v2)
1182 n := uint32(v1)
1183 ce.pushValue(uint64(n / d))
1184 case signedTypeUint64:
1185 d := v2
1186 n := v1
1187 ce.pushValue(n / d)
1188 case signedTypeFloat32:
1189 ce.pushValue(divFloat32bits(uint32(v1), uint32(v2)))
1190 case signedTypeFloat64:
1191 ce.pushValue(math.Float64bits(math.Float64frombits(v1) / math.Float64frombits(v2)))
1192 }
1193 frame.pc++
1194 case operationKindRem:
1195 v2, v1 := ce.popValue(), ce.popValue()
1196 if v2 == 0 {
1197 panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
1198 }
1199 switch signedInt(op.B1) {
1200 case signedInt32:
1201 d := int32(v2)
1202 n := int32(v1)
1203 ce.pushValue(uint64(uint32(n % d)))
1204 case signedInt64:
1205 d := int64(v2)
1206 n := int64(v1)
1207 ce.pushValue(uint64(n % d))
1208 case signedUint32:
1209 d := uint32(v2)
1210 n := uint32(v1)
1211 ce.pushValue(uint64(n % d))
1212 case signedUint64:
1213 d := v2
1214 n := v1
1215 ce.pushValue(n % d)
1216 }
1217 frame.pc++
1218 case operationKindAnd:
1219 v2 := ce.popValue()
1220 v1 := ce.popValue()
1221 if op.B1 == 0 {
1222 // unsignedInt32
1223 ce.pushValue(uint64(uint32(v2) & uint32(v1)))
1224 } else {
1225 // unsignedInt64
1226 ce.pushValue(uint64(v2 & v1))
1227 }
1228 frame.pc++
1229 case operationKindOr:
1230 v2 := ce.popValue()
1231 v1 := ce.popValue()
1232 if op.B1 == 0 {
1233 // unsignedInt32
1234 ce.pushValue(uint64(uint32(v2) | uint32(v1)))
1235 } else {
1236 // unsignedInt64
1237 ce.pushValue(uint64(v2 | v1))
1238 }
1239 frame.pc++
1240 case operationKindXor:
1241 v2 := ce.popValue()
1242 v1 := ce.popValue()
1243 if op.B1 == 0 {
1244 // unsignedInt32
1245 ce.pushValue(uint64(uint32(v2) ^ uint32(v1)))
1246 } else {
1247 // unsignedInt64
1248 ce.pushValue(uint64(v2 ^ v1))
1249 }
1250 frame.pc++
1251 case operationKindShl:
1252 v2 := ce.popValue()
1253 v1 := ce.popValue()
1254 if op.B1 == 0 {
1255 // unsignedInt32
1256 ce.pushValue(uint64(uint32(v1) << (uint32(v2) % 32)))
1257 } else {
1258 // unsignedInt64
1259 ce.pushValue(v1 << (v2 % 64))
1260 }
1261 frame.pc++
1262 case operationKindShr:
1263 v2 := ce.popValue()
1264 v1 := ce.popValue()
1265 switch signedInt(op.B1) {
1266 case signedInt32:
1267 ce.pushValue(uint64(uint32(int32(v1) >> (uint32(v2) % 32))))
1268 case signedInt64:
1269 ce.pushValue(uint64(int64(v1) >> (v2 % 64)))
1270 case signedUint32:
1271 ce.pushValue(uint64(uint32(v1) >> (uint32(v2) % 32)))
1272 case signedUint64:
1273 ce.pushValue(v1 >> (v2 % 64))
1274 }
1275 frame.pc++
1276 case operationKindRotl:
1277 v2 := ce.popValue()
1278 v1 := ce.popValue()
1279 if op.B1 == 0 {
1280 // unsignedInt32
1281 ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), int(v2))))
1282 } else {
1283 // unsignedInt64
1284 ce.pushValue(uint64(bits.RotateLeft64(v1, int(v2))))
1285 }
1286 frame.pc++
1287 case operationKindRotr:
1288 v2 := ce.popValue()
1289 v1 := ce.popValue()
1290 if op.B1 == 0 {
1291 // unsignedInt32
1292 ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), -int(v2))))
1293 } else {
1294 // unsignedInt64
1295 ce.pushValue(uint64(bits.RotateLeft64(v1, -int(v2))))
1296 }
1297 frame.pc++
1298 case operationKindAbs:
1299 if op.B1 == 0 {
1300 // float32
1301 const mask uint32 = 1 << 31
1302 ce.pushValue(uint64(uint32(ce.popValue()) &^ mask))
1303 } else {
1304 // float64
1305 const mask uint64 = 1 << 63
1306 ce.pushValue(ce.popValue() &^ mask)
1307 }
1308 frame.pc++
1309 case operationKindNeg:
1310 if op.B1 == 0 {
1311 // float32
1312 v := -math.Float32frombits(uint32(ce.popValue()))
1313 ce.pushValue(uint64(math.Float32bits(v)))
1314 } else {
1315 // float64
1316 v := -math.Float64frombits(ce.popValue())
1317 ce.pushValue(math.Float64bits(v))
1318 }
1319 frame.pc++
1320 case operationKindCeil:
1321 if op.B1 == 0 {
1322 // float32
1323 v := moremath.WasmCompatCeilF32(math.Float32frombits(uint32(ce.popValue())))
1324 ce.pushValue(uint64(math.Float32bits(v)))
1325 } else {
1326 // float64
1327 v := moremath.WasmCompatCeilF64(math.Float64frombits(ce.popValue()))
1328 ce.pushValue(math.Float64bits(v))
1329 }
1330 frame.pc++
1331 case operationKindFloor:
1332 if op.B1 == 0 {
1333 // float32
1334 v := moremath.WasmCompatFloorF32(math.Float32frombits(uint32(ce.popValue())))
1335 ce.pushValue(uint64(math.Float32bits(v)))
1336 } else {
1337 // float64
1338 v := moremath.WasmCompatFloorF64(math.Float64frombits(ce.popValue()))
1339 ce.pushValue(math.Float64bits(v))
1340 }
1341 frame.pc++
1342 case operationKindTrunc:
1343 if op.B1 == 0 {
1344 // float32
1345 v := moremath.WasmCompatTruncF32(math.Float32frombits(uint32(ce.popValue())))
1346 ce.pushValue(uint64(math.Float32bits(v)))
1347 } else {
1348 // float64
1349 v := moremath.WasmCompatTruncF64(math.Float64frombits(ce.popValue()))
1350 ce.pushValue(math.Float64bits(v))
1351 }
1352 frame.pc++
1353 case operationKindNearest:
1354 if op.B1 == 0 {
1355 // float32
1356 f := math.Float32frombits(uint32(ce.popValue()))
1357 ce.pushValue(uint64(math.Float32bits(moremath.WasmCompatNearestF32(f))))
1358 } else {
1359 // float64
1360 f := math.Float64frombits(ce.popValue())
1361 ce.pushValue(math.Float64bits(moremath.WasmCompatNearestF64(f)))
1362 }
1363 frame.pc++
1364 case operationKindSqrt:
1365 if op.B1 == 0 {
1366 // float32
1367 v := math.Sqrt(float64(math.Float32frombits(uint32(ce.popValue()))))
1368 ce.pushValue(uint64(math.Float32bits(float32(v))))
1369 } else {
1370 // float64
1371 v := math.Sqrt(math.Float64frombits(ce.popValue()))
1372 ce.pushValue(math.Float64bits(v))
1373 }
1374 frame.pc++
1375 case operationKindMin:
1376 if op.B1 == 0 {
1377 // float32
1378 ce.pushValue(wasmCompatMin32bits(uint32(ce.popValue()), uint32(ce.popValue())))
1379 } else {
1380 v2 := math.Float64frombits(ce.popValue())
1381 v1 := math.Float64frombits(ce.popValue())
1382 ce.pushValue(math.Float64bits(moremath.WasmCompatMin64(v1, v2)))
1383 }
1384 frame.pc++
1385 case operationKindMax:
1386 if op.B1 == 0 {
1387 ce.pushValue(wasmCompatMax32bits(uint32(ce.popValue()), uint32(ce.popValue())))
1388 } else {
1389 // float64
1390 v2 := math.Float64frombits(ce.popValue())
1391 v1 := math.Float64frombits(ce.popValue())
1392 ce.pushValue(math.Float64bits(moremath.WasmCompatMax64(v1, v2)))
1393 }
1394 frame.pc++
1395 case operationKindCopysign:
1396 if op.B1 == 0 {
1397 // float32
1398 v2 := uint32(ce.popValue())
1399 v1 := uint32(ce.popValue())
1400 const signbit = 1 << 31
1401 ce.pushValue(uint64(v1&^signbit | v2&signbit))
1402 } else {
1403 // float64
1404 v2 := ce.popValue()
1405 v1 := ce.popValue()
1406 const signbit = 1 << 63
1407 ce.pushValue(v1&^signbit | v2&signbit)
1408 }
1409 frame.pc++
1410 case operationKindI32WrapFromI64:
1411 ce.pushValue(uint64(uint32(ce.popValue())))
1412 frame.pc++
1413 case operationKindITruncFromF:
1414 if op.B1 == 0 {
1415 // float32
1416 switch signedInt(op.B2) {
1417 case signedInt32:
1418 v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
1419 if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
1420 if op.B3 {
1421 // non-trapping conversion must cast nan to zero.
1422 v = 0
1423 } else {
1424 panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
1425 }
1426 } else if v < math.MinInt32 || v > math.MaxInt32 {
1427 if op.B3 {
1428 // non-trapping conversion must "saturate" the value for overflowing sources.
1429 if v < 0 {
1430 v = math.MinInt32
1431 } else {
1432 v = math.MaxInt32
1433 }
1434 } else {
1435 panic(wasmruntime.ErrRuntimeIntegerOverflow)
1436 }
1437 }
1438 ce.pushValue(uint64(uint32(int32(v))))
1439 case signedInt64:
1440 v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
1441 res := int64(v)
1442 if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
1443 if op.B3 {
1444 // non-trapping conversion must cast nan to zero.
1445 res = 0
1446 } else {
1447 panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
1448 }
1449 } else if v < math.MinInt64 || v >= math.MaxInt64 {
1450 // Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
1451 // and that's why we use '>=' not '>' to check overflow.
1452 if op.B3 {
1453 // non-trapping conversion must "saturate" the value for overflowing sources.
1454 if v < 0 {
1455 res = math.MinInt64
1456 } else {
1457 res = math.MaxInt64
1458 }
1459 } else {
1460 panic(wasmruntime.ErrRuntimeIntegerOverflow)
1461 }
1462 }
1463 ce.pushValue(uint64(res))
1464 case signedUint32:
1465 v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
1466 if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
1467 if op.B3 {
1468 // non-trapping conversion must cast nan to zero.
1469 v = 0
1470 } else {
1471 panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
1472 }
1473 } else if v < 0 || v > math.MaxUint32 {
1474 if op.B3 {
1475 // non-trapping conversion must "saturate" the value for overflowing source.
1476 if v < 0 {
1477 v = 0
1478 } else {
1479 v = math.MaxUint32
1480 }
1481 } else {
1482 panic(wasmruntime.ErrRuntimeIntegerOverflow)
1483 }
1484 }
1485 ce.pushValue(uint64(uint32(v)))
1486 case signedUint64:
1487 v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
1488 res := uint64(v)
1489 if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
1490 if op.B3 {
1491 // non-trapping conversion must cast nan to zero.
1492 res = 0
1493 } else {
1494 panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
1495 }
1496 } else if v < 0 || v >= math.MaxUint64 {
1497 // Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
1498 // and that's why we use '>=' not '>' to check overflow.
1499 if op.B3 {
1500 // non-trapping conversion must "saturate" the value for overflowing source.
1501 if v < 0 {
1502 res = 0
1503 } else {
1504 res = math.MaxUint64
1505 }
1506 } else {
1507 panic(wasmruntime.ErrRuntimeIntegerOverflow)
1508 }
1509 }
1510 ce.pushValue(res)
1511 }
1512 } else {
1513 // float64
1514 switch signedInt(op.B2) {
1515 case signedInt32:
1516 v := math.Trunc(math.Float64frombits(ce.popValue()))
1517 if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
1518 if op.B3 {
1519 // non-trapping conversion must cast nan to zero.
1520 v = 0
1521 } else {
1522 panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
1523 }
1524 } else if v < math.MinInt32 || v > math.MaxInt32 {
1525 if op.B3 {
1526 // non-trapping conversion must "saturate" the value for overflowing source.
1527 if v < 0 {
1528 v = math.MinInt32
1529 } else {
1530 v = math.MaxInt32
1531 }
1532 } else {
1533 panic(wasmruntime.ErrRuntimeIntegerOverflow)
1534 }
1535 }
1536 ce.pushValue(uint64(uint32(int32(v))))
1537 case signedInt64:
1538 v := math.Trunc(math.Float64frombits(ce.popValue()))
1539 res := int64(v)
1540 if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
1541 if op.B3 {
1542 // non-trapping conversion must cast nan to zero.
1543 res = 0
1544 } else {
1545 panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
1546 }
1547 } else if v < math.MinInt64 || v >= math.MaxInt64 {
1548 // Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
1549 // and that's why we use '>=' not '>' to check overflow.
1550 if op.B3 {
1551 // non-trapping conversion must "saturate" the value for overflowing source.
1552 if v < 0 {
1553 res = math.MinInt64
1554 } else {
1555 res = math.MaxInt64
1556 }
1557 } else {
1558 panic(wasmruntime.ErrRuntimeIntegerOverflow)
1559 }
1560 }
1561 ce.pushValue(uint64(res))
1562 case signedUint32:
1563 v := math.Trunc(math.Float64frombits(ce.popValue()))
1564 if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
1565 if op.B3 {
1566 // non-trapping conversion must cast nan to zero.
1567 v = 0
1568 } else {
1569 panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
1570 }
1571 } else if v < 0 || v > math.MaxUint32 {
1572 if op.B3 {
1573 // non-trapping conversion must "saturate" the value for overflowing source.
1574 if v < 0 {
1575 v = 0
1576 } else {
1577 v = math.MaxUint32
1578 }
1579 } else {
1580 panic(wasmruntime.ErrRuntimeIntegerOverflow)
1581 }
1582 }
1583 ce.pushValue(uint64(uint32(v)))
1584 case signedUint64:
1585 v := math.Trunc(math.Float64frombits(ce.popValue()))
1586 res := uint64(v)
1587 if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
1588 if op.B3 {
1589 // non-trapping conversion must cast nan to zero.
1590 res = 0
1591 } else {
1592 panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
1593 }
1594 } else if v < 0 || v >= math.MaxUint64 {
1595 // Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
1596 // and that's why we use '>=' not '>' to check overflow.
1597 if op.B3 {
1598 // non-trapping conversion must "saturate" the value for overflowing source.
1599 if v < 0 {
1600 res = 0
1601 } else {
1602 res = math.MaxUint64
1603 }
1604 } else {
1605 panic(wasmruntime.ErrRuntimeIntegerOverflow)
1606 }
1607 }
1608 ce.pushValue(res)
1609 }
1610 }
1611 frame.pc++
1612 case operationKindFConvertFromI:
1613 switch signedInt(op.B1) {
1614 case signedInt32:
1615 if op.B2 == 0 {
1616 // float32
1617 v := float32(int32(ce.popValue()))
1618 ce.pushValue(uint64(math.Float32bits(v)))
1619 } else {
1620 // float64
1621 v := float64(int32(ce.popValue()))
1622 ce.pushValue(math.Float64bits(v))
1623 }
1624 case signedInt64:
1625 if op.B2 == 0 {
1626 // float32
1627 v := float32(int64(ce.popValue()))
1628 ce.pushValue(uint64(math.Float32bits(v)))
1629 } else {
1630 // float64
1631 v := float64(int64(ce.popValue()))
1632 ce.pushValue(math.Float64bits(v))
1633 }
1634 case signedUint32:
1635 if op.B2 == 0 {
1636 // float32
1637 v := float32(uint32(ce.popValue()))
1638 ce.pushValue(uint64(math.Float32bits(v)))
1639 } else {
1640 // float64
1641 v := float64(uint32(ce.popValue()))
1642 ce.pushValue(math.Float64bits(v))
1643 }
1644 case signedUint64:
1645 if op.B2 == 0 {
1646 // float32
1647 v := float32(ce.popValue())
1648 ce.pushValue(uint64(math.Float32bits(v)))
1649 } else {
1650 // float64
1651 v := float64(ce.popValue())
1652 ce.pushValue(math.Float64bits(v))
1653 }
1654 }
1655 frame.pc++
1656 case operationKindF32DemoteFromF64:
1657 v := float32(math.Float64frombits(ce.popValue()))
1658 ce.pushValue(uint64(math.Float32bits(v)))
1659 frame.pc++
1660 case operationKindF64PromoteFromF32:
1661 v := float64(math.Float32frombits(uint32(ce.popValue())))
1662 ce.pushValue(math.Float64bits(v))
1663 frame.pc++
1664 case operationKindExtend:
1665 if op.B1 == 1 {
1666 // Signed.
1667 v := int64(int32(ce.popValue()))
1668 ce.pushValue(uint64(v))
1669 } else {
1670 v := uint64(uint32(ce.popValue()))
1671 ce.pushValue(v)
1672 }
1673 frame.pc++
1674 case operationKindSignExtend32From8:
1675 v := uint32(int8(ce.popValue()))
1676 ce.pushValue(uint64(v))
1677 frame.pc++
1678 case operationKindSignExtend32From16:
1679 v := uint32(int16(ce.popValue()))
1680 ce.pushValue(uint64(v))
1681 frame.pc++
1682 case operationKindSignExtend64From8:
1683 v := int64(int8(ce.popValue()))
1684 ce.pushValue(uint64(v))
1685 frame.pc++
1686 case operationKindSignExtend64From16:
1687 v := int64(int16(ce.popValue()))
1688 ce.pushValue(uint64(v))
1689 frame.pc++
1690 case operationKindSignExtend64From32:
1691 v := int64(int32(ce.popValue()))
1692 ce.pushValue(uint64(v))
1693 frame.pc++
1694 case operationKindMemoryInit:
1695 dataInstance := dataInstances[op.U1]
1696 copySize := ce.popValue()
1697 inDataOffset := ce.popValue()
1698 inMemoryOffset := ce.popValue()
1699 if inDataOffset+copySize > uint64(len(dataInstance)) ||
1700 inMemoryOffset+copySize > uint64(len(memoryInst.Buffer)) {
1701 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1702 } else if copySize != 0 {
1703 copy(memoryInst.Buffer[inMemoryOffset:inMemoryOffset+copySize], dataInstance[inDataOffset:])
1704 }
1705 frame.pc++
1706 case operationKindDataDrop:
1707 dataInstances[op.U1] = nil
1708 frame.pc++
1709 case operationKindMemoryCopy:
1710 memLen := uint64(len(memoryInst.Buffer))
1711 copySize := ce.popValue()
1712 sourceOffset := ce.popValue()
1713 destinationOffset := ce.popValue()
1714 if sourceOffset+copySize > memLen || destinationOffset+copySize > memLen {
1715 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1716 } else if copySize != 0 {
1717 copy(memoryInst.Buffer[destinationOffset:],
1718 memoryInst.Buffer[sourceOffset:sourceOffset+copySize])
1719 }
1720 frame.pc++
1721 case operationKindMemoryFill:
1722 fillSize := ce.popValue()
1723 value := byte(ce.popValue())
1724 offset := ce.popValue()
1725 if fillSize+offset > uint64(len(memoryInst.Buffer)) {
1726 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1727 } else if fillSize != 0 {
1728 // Uses the copy trick for faster filling buffer.
1729 // https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
1730 buf := memoryInst.Buffer[offset : offset+fillSize]
1731 buf[0] = value
1732 for i := 1; i < len(buf); i *= 2 {
1733 copy(buf[i:], buf[:i])
1734 }
1735 }
1736 frame.pc++
1737 case operationKindTableInit:
1738 elementInstance := elementInstances[op.U1]
1739 copySize := ce.popValue()
1740 inElementOffset := ce.popValue()
1741 inTableOffset := ce.popValue()
1742 table := tables[op.U2]
1743 if inElementOffset+copySize > uint64(len(elementInstance)) ||
1744 inTableOffset+copySize > uint64(len(table.References)) {
1745 panic(wasmruntime.ErrRuntimeInvalidTableAccess)
1746 } else if copySize != 0 {
1747 copy(table.References[inTableOffset:inTableOffset+copySize], elementInstance[inElementOffset:])
1748 }
1749 frame.pc++
1750 case operationKindElemDrop:
1751 elementInstances[op.U1] = nil
1752 frame.pc++
1753 case operationKindTableCopy:
1754 srcTable, dstTable := tables[op.U1].References, tables[op.U2].References
1755 copySize := ce.popValue()
1756 sourceOffset := ce.popValue()
1757 destinationOffset := ce.popValue()
1758 if sourceOffset+copySize > uint64(len(srcTable)) || destinationOffset+copySize > uint64(len(dstTable)) {
1759 panic(wasmruntime.ErrRuntimeInvalidTableAccess)
1760 } else if copySize != 0 {
1761 copy(dstTable[destinationOffset:], srcTable[sourceOffset:sourceOffset+copySize])
1762 }
1763 frame.pc++
1764 case operationKindRefFunc:
1765 ce.pushValue(uint64(uintptr(unsafe.Pointer(&functions[op.U1]))))
1766 frame.pc++
1767 case operationKindTableGet:
1768 table := tables[op.U1]
1769
1770 offset := ce.popValue()
1771 if offset >= uint64(len(table.References)) {
1772 panic(wasmruntime.ErrRuntimeInvalidTableAccess)
1773 }
1774
1775 ce.pushValue(uint64(table.References[offset]))
1776 frame.pc++
1777 case operationKindTableSet:
1778 table := tables[op.U1]
1779 ref := ce.popValue()
1780
1781 offset := ce.popValue()
1782 if offset >= uint64(len(table.References)) {
1783 panic(wasmruntime.ErrRuntimeInvalidTableAccess)
1784 }
1785
1786 table.References[offset] = uintptr(ref) // externrefs are opaque uint64.
1787 frame.pc++
1788 case operationKindTableSize:
1789 table := tables[op.U1]
1790 ce.pushValue(uint64(len(table.References)))
1791 frame.pc++
1792 case operationKindTableGrow:
1793 table := tables[op.U1]
1794 num, ref := ce.popValue(), ce.popValue()
1795 ret := table.Grow(uint32(num), uintptr(ref))
1796 ce.pushValue(uint64(ret))
1797 frame.pc++
1798 case operationKindTableFill:
1799 table := tables[op.U1]
1800 num := ce.popValue()
1801 ref := uintptr(ce.popValue())
1802 offset := ce.popValue()
1803 if num+offset > uint64(len(table.References)) {
1804 panic(wasmruntime.ErrRuntimeInvalidTableAccess)
1805 } else if num > 0 {
1806 // Uses the copy trick for faster filling the region with the value.
1807 // https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
1808 targetRegion := table.References[offset : offset+num]
1809 targetRegion[0] = ref
1810 for i := 1; i < len(targetRegion); i *= 2 {
1811 copy(targetRegion[i:], targetRegion[:i])
1812 }
1813 }
1814 frame.pc++
1815 case operationKindV128Const:
1816 lo, hi := op.U1, op.U2
1817 ce.pushValue(lo)
1818 ce.pushValue(hi)
1819 frame.pc++
1820 case operationKindV128Add:
1821 yHigh, yLow := ce.popValue(), ce.popValue()
1822 xHigh, xLow := ce.popValue(), ce.popValue()
1823 switch op.B1 {
1824 case shapeI8x16:
1825 ce.pushValue(
1826 uint64(uint8(xLow>>8)+uint8(yLow>>8))<<8 | uint64(uint8(xLow)+uint8(yLow)) |
1827 uint64(uint8(xLow>>24)+uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)+uint8(yLow>>16))<<16 |
1828 uint64(uint8(xLow>>40)+uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)+uint8(yLow>>32))<<32 |
1829 uint64(uint8(xLow>>56)+uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)+uint8(yLow>>48))<<48,
1830 )
1831 ce.pushValue(
1832 uint64(uint8(xHigh>>8)+uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)+uint8(yHigh)) |
1833 uint64(uint8(xHigh>>24)+uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)+uint8(yHigh>>16))<<16 |
1834 uint64(uint8(xHigh>>40)+uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)+uint8(yHigh>>32))<<32 |
1835 uint64(uint8(xHigh>>56)+uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)+uint8(yHigh>>48))<<48,
1836 )
1837 case shapeI16x8:
1838 ce.pushValue(
1839 uint64(uint16(xLow>>16+yLow>>16))<<16 | uint64(uint16(xLow)+uint16(yLow)) |
1840 uint64(uint16(xLow>>48+yLow>>48))<<48 | uint64(uint16(xLow>>32+yLow>>32))<<32,
1841 )
1842 ce.pushValue(
1843 uint64(uint16(xHigh>>16)+uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)+uint16(yHigh)) |
1844 uint64(uint16(xHigh>>48)+uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)+uint16(yHigh>>32))<<32,
1845 )
1846 case shapeI32x4:
1847 ce.pushValue(uint64(uint32(xLow>>32)+uint32(yLow>>32))<<32 | uint64(uint32(xLow)+uint32(yLow)))
1848 ce.pushValue(uint64(uint32(xHigh>>32)+uint32(yHigh>>32))<<32 | uint64(uint32(xHigh)+uint32(yHigh)))
1849 case shapeI64x2:
1850 ce.pushValue(xLow + yLow)
1851 ce.pushValue(xHigh + yHigh)
1852 case shapeF32x4:
1853 ce.pushValue(
1854 addFloat32bits(uint32(xLow), uint32(yLow)) | addFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
1855 )
1856 ce.pushValue(
1857 addFloat32bits(uint32(xHigh), uint32(yHigh)) | addFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
1858 )
1859 case shapeF64x2:
1860 ce.pushValue(math.Float64bits(math.Float64frombits(xLow) + math.Float64frombits(yLow)))
1861 ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) + math.Float64frombits(yHigh)))
1862 }
1863 frame.pc++
1864 case operationKindV128Sub:
1865 yHigh, yLow := ce.popValue(), ce.popValue()
1866 xHigh, xLow := ce.popValue(), ce.popValue()
1867 switch op.B1 {
1868 case shapeI8x16:
1869 ce.pushValue(
1870 uint64(uint8(xLow>>8)-uint8(yLow>>8))<<8 | uint64(uint8(xLow)-uint8(yLow)) |
1871 uint64(uint8(xLow>>24)-uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)-uint8(yLow>>16))<<16 |
1872 uint64(uint8(xLow>>40)-uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)-uint8(yLow>>32))<<32 |
1873 uint64(uint8(xLow>>56)-uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)-uint8(yLow>>48))<<48,
1874 )
1875 ce.pushValue(
1876 uint64(uint8(xHigh>>8)-uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)-uint8(yHigh)) |
1877 uint64(uint8(xHigh>>24)-uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)-uint8(yHigh>>16))<<16 |
1878 uint64(uint8(xHigh>>40)-uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)-uint8(yHigh>>32))<<32 |
1879 uint64(uint8(xHigh>>56)-uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)-uint8(yHigh>>48))<<48,
1880 )
1881 case shapeI16x8:
1882 ce.pushValue(
1883 uint64(uint16(xLow>>16)-uint16(yLow>>16))<<16 | uint64(uint16(xLow)-uint16(yLow)) |
1884 uint64(uint16(xLow>>48)-uint16(yLow>>48))<<48 | uint64(uint16(xLow>>32)-uint16(yLow>>32))<<32,
1885 )
1886 ce.pushValue(
1887 uint64(uint16(xHigh>>16)-uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)-uint16(yHigh)) |
1888 uint64(uint16(xHigh>>48)-uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)-uint16(yHigh>>32))<<32,
1889 )
1890 case shapeI32x4:
1891 ce.pushValue(uint64(uint32(xLow>>32-yLow>>32))<<32 | uint64(uint32(xLow)-uint32(yLow)))
1892 ce.pushValue(uint64(uint32(xHigh>>32-yHigh>>32))<<32 | uint64(uint32(xHigh)-uint32(yHigh)))
1893 case shapeI64x2:
1894 ce.pushValue(xLow - yLow)
1895 ce.pushValue(xHigh - yHigh)
1896 case shapeF32x4:
1897 ce.pushValue(
1898 subFloat32bits(uint32(xLow), uint32(yLow)) | subFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
1899 )
1900 ce.pushValue(
1901 subFloat32bits(uint32(xHigh), uint32(yHigh)) | subFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
1902 )
1903 case shapeF64x2:
1904 ce.pushValue(math.Float64bits(math.Float64frombits(xLow) - math.Float64frombits(yLow)))
1905 ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) - math.Float64frombits(yHigh)))
1906 }
1907 frame.pc++
1908 case operationKindV128Load:
1909 offset := ce.popMemoryOffset(op)
1910 switch op.B1 {
1911 case v128LoadType128:
1912 lo, ok := memoryInst.ReadUint64Le(offset)
1913 if !ok {
1914 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1915 }
1916 ce.pushValue(lo)
1917 hi, ok := memoryInst.ReadUint64Le(offset + 8)
1918 if !ok {
1919 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1920 }
1921 ce.pushValue(hi)
1922 case v128LoadType8x8s:
1923 data, ok := memoryInst.Read(offset, 8)
1924 if !ok {
1925 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1926 }
1927 ce.pushValue(
1928 uint64(uint16(int8(data[3])))<<48 | uint64(uint16(int8(data[2])))<<32 | uint64(uint16(int8(data[1])))<<16 | uint64(uint16(int8(data[0]))),
1929 )
1930 ce.pushValue(
1931 uint64(uint16(int8(data[7])))<<48 | uint64(uint16(int8(data[6])))<<32 | uint64(uint16(int8(data[5])))<<16 | uint64(uint16(int8(data[4]))),
1932 )
1933 case v128LoadType8x8u:
1934 data, ok := memoryInst.Read(offset, 8)
1935 if !ok {
1936 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1937 }
1938 ce.pushValue(
1939 uint64(data[3])<<48 | uint64(data[2])<<32 | uint64(data[1])<<16 | uint64(data[0]),
1940 )
1941 ce.pushValue(
1942 uint64(data[7])<<48 | uint64(data[6])<<32 | uint64(data[5])<<16 | uint64(data[4]),
1943 )
1944 case v128LoadType16x4s:
1945 data, ok := memoryInst.Read(offset, 8)
1946 if !ok {
1947 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1948 }
1949 ce.pushValue(
1950 uint64(int16(binary.LittleEndian.Uint16(data[2:])))<<32 |
1951 uint64(uint32(int16(binary.LittleEndian.Uint16(data)))),
1952 )
1953 ce.pushValue(
1954 uint64(uint32(int16(binary.LittleEndian.Uint16(data[6:]))))<<32 |
1955 uint64(uint32(int16(binary.LittleEndian.Uint16(data[4:])))),
1956 )
1957 case v128LoadType16x4u:
1958 data, ok := memoryInst.Read(offset, 8)
1959 if !ok {
1960 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1961 }
1962 ce.pushValue(
1963 uint64(binary.LittleEndian.Uint16(data[2:]))<<32 | uint64(binary.LittleEndian.Uint16(data)),
1964 )
1965 ce.pushValue(
1966 uint64(binary.LittleEndian.Uint16(data[6:]))<<32 | uint64(binary.LittleEndian.Uint16(data[4:])),
1967 )
1968 case v128LoadType32x2s:
1969 data, ok := memoryInst.Read(offset, 8)
1970 if !ok {
1971 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1972 }
1973 ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data))))
1974 ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data[4:]))))
1975 case v128LoadType32x2u:
1976 data, ok := memoryInst.Read(offset, 8)
1977 if !ok {
1978 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1979 }
1980 ce.pushValue(uint64(binary.LittleEndian.Uint32(data)))
1981 ce.pushValue(uint64(binary.LittleEndian.Uint32(data[4:])))
1982 case v128LoadType8Splat:
1983 v, ok := memoryInst.ReadByte(offset)
1984 if !ok {
1985 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1986 }
1987 v8 := uint64(v)<<56 | uint64(v)<<48 | uint64(v)<<40 | uint64(v)<<32 |
1988 uint64(v)<<24 | uint64(v)<<16 | uint64(v)<<8 | uint64(v)
1989 ce.pushValue(v8)
1990 ce.pushValue(v8)
1991 case v128LoadType16Splat:
1992 v, ok := memoryInst.ReadUint16Le(offset)
1993 if !ok {
1994 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
1995 }
1996 v4 := uint64(v)<<48 | uint64(v)<<32 | uint64(v)<<16 | uint64(v)
1997 ce.pushValue(v4)
1998 ce.pushValue(v4)
1999 case v128LoadType32Splat:
2000 v, ok := memoryInst.ReadUint32Le(offset)
2001 if !ok {
2002 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
2003 }
2004 vv := uint64(v)<<32 | uint64(v)
2005 ce.pushValue(vv)
2006 ce.pushValue(vv)
2007 case v128LoadType64Splat:
2008 lo, ok := memoryInst.ReadUint64Le(offset)
2009 if !ok {
2010 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
2011 }
2012 ce.pushValue(lo)
2013 ce.pushValue(lo)
2014 case v128LoadType32zero:
2015 lo, ok := memoryInst.ReadUint32Le(offset)
2016 if !ok {
2017 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
2018 }
2019 ce.pushValue(uint64(lo))
2020 ce.pushValue(0)
2021 case v128LoadType64zero:
2022 lo, ok := memoryInst.ReadUint64Le(offset)
2023 if !ok {
2024 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
2025 }
2026 ce.pushValue(lo)
2027 ce.pushValue(0)
2028 }
2029 frame.pc++
2030 case operationKindV128LoadLane:
2031 hi, lo := ce.popValue(), ce.popValue()
2032 offset := ce.popMemoryOffset(op)
2033 switch op.B1 {
2034 case 8:
2035 b, ok := memoryInst.ReadByte(offset)
2036 if !ok {
2037 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
2038 }
2039 if op.B2 < 8 {
2040 s := op.B2 << 3
2041 lo = (lo & ^(0xff << s)) | uint64(b)<<s
2042 } else {
2043 s := (op.B2 - 8) << 3
2044 hi = (hi & ^(0xff << s)) | uint64(b)<<s
2045 }
2046 case 16:
2047 b, ok := memoryInst.ReadUint16Le(offset)
2048 if !ok {
2049 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
2050 }
2051 if op.B2 < 4 {
2052 s := op.B2 << 4
2053 lo = (lo & ^(0xff_ff << s)) | uint64(b)<<s
2054 } else {
2055 s := (op.B2 - 4) << 4
2056 hi = (hi & ^(0xff_ff << s)) | uint64(b)<<s
2057 }
2058 case 32:
2059 b, ok := memoryInst.ReadUint32Le(offset)
2060 if !ok {
2061 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
2062 }
2063 if op.B2 < 2 {
2064 s := op.B2 << 5
2065 lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
2066 } else {
2067 s := (op.B2 - 2) << 5
2068 hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
2069 }
2070 case 64:
2071 b, ok := memoryInst.ReadUint64Le(offset)
2072 if !ok {
2073 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
2074 }
2075 if op.B2 == 0 {
2076 lo = b
2077 } else {
2078 hi = b
2079 }
2080 }
2081 ce.pushValue(lo)
2082 ce.pushValue(hi)
2083 frame.pc++
2084 case operationKindV128Store:
2085 hi, lo := ce.popValue(), ce.popValue()
2086 offset := ce.popMemoryOffset(op)
2087 // Write the upper bytes first to trigger an early error if the memory access is out of bounds.
2088 // Otherwise, the lower bytes might be written to memory, but the upper bytes might not.
2089 if uint64(offset)+8 > math.MaxUint32 {
2090 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
2091 }
2092 if ok := memoryInst.WriteUint64Le(offset+8, hi); !ok {
2093 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
2094 }
2095 if ok := memoryInst.WriteUint64Le(offset, lo); !ok {
2096 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
2097 }
2098 frame.pc++
2099 case operationKindV128StoreLane:
2100 hi, lo := ce.popValue(), ce.popValue()
2101 offset := ce.popMemoryOffset(op)
2102 var ok bool
2103 switch op.B1 {
2104 case 8:
2105 if op.B2 < 8 {
2106 ok = memoryInst.WriteByte(offset, byte(lo>>(op.B2*8)))
2107 } else {
2108 ok = memoryInst.WriteByte(offset, byte(hi>>((op.B2-8)*8)))
2109 }
2110 case 16:
2111 if op.B2 < 4 {
2112 ok = memoryInst.WriteUint16Le(offset, uint16(lo>>(op.B2*16)))
2113 } else {
2114 ok = memoryInst.WriteUint16Le(offset, uint16(hi>>((op.B2-4)*16)))
2115 }
2116 case 32:
2117 if op.B2 < 2 {
2118 ok = memoryInst.WriteUint32Le(offset, uint32(lo>>(op.B2*32)))
2119 } else {
2120 ok = memoryInst.WriteUint32Le(offset, uint32(hi>>((op.B2-2)*32)))
2121 }
2122 case 64:
2123 if op.B2 == 0 {
2124 ok = memoryInst.WriteUint64Le(offset, lo)
2125 } else {
2126 ok = memoryInst.WriteUint64Le(offset, hi)
2127 }
2128 }
2129 if !ok {
2130 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
2131 }
2132 frame.pc++
2133 case operationKindV128ReplaceLane:
2134 v := ce.popValue()
2135 hi, lo := ce.popValue(), ce.popValue()
2136 switch op.B1 {
2137 case shapeI8x16:
2138 if op.B2 < 8 {
2139 s := op.B2 << 3
2140 lo = (lo & ^(0xff << s)) | uint64(byte(v))<<s
2141 } else {
2142 s := (op.B2 - 8) << 3
2143 hi = (hi & ^(0xff << s)) | uint64(byte(v))<<s
2144 }
2145 case shapeI16x8:
2146 if op.B2 < 4 {
2147 s := op.B2 << 4
2148 lo = (lo & ^(0xff_ff << s)) | uint64(uint16(v))<<s
2149 } else {
2150 s := (op.B2 - 4) << 4
2151 hi = (hi & ^(0xff_ff << s)) | uint64(uint16(v))<<s
2152 }
2153 case shapeI32x4, shapeF32x4:
2154 if op.B2 < 2 {
2155 s := op.B2 << 5
2156 lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
2157 } else {
2158 s := (op.B2 - 2) << 5
2159 hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
2160 }
2161 case shapeI64x2, shapeF64x2:
2162 if op.B2 == 0 {
2163 lo = v
2164 } else {
2165 hi = v
2166 }
2167 }
2168 ce.pushValue(lo)
2169 ce.pushValue(hi)
2170 frame.pc++
2171 case operationKindV128ExtractLane:
2172 hi, lo := ce.popValue(), ce.popValue()
2173 var v uint64
2174 switch op.B1 {
2175 case shapeI8x16:
2176 var u8 byte
2177 if op.B2 < 8 {
2178 u8 = byte(lo >> (op.B2 * 8))
2179 } else {
2180 u8 = byte(hi >> ((op.B2 - 8) * 8))
2181 }
2182 if op.B3 {
2183 // sign-extend.
2184 v = uint64(uint32(int8(u8)))
2185 } else {
2186 v = uint64(u8)
2187 }
2188 case shapeI16x8:
2189 var u16 uint16
2190 if op.B2 < 4 {
2191 u16 = uint16(lo >> (op.B2 * 16))
2192 } else {
2193 u16 = uint16(hi >> ((op.B2 - 4) * 16))
2194 }
2195 if op.B3 {
2196 // sign-extend.
2197 v = uint64(uint32(int16(u16)))
2198 } else {
2199 v = uint64(u16)
2200 }
2201 case shapeI32x4, shapeF32x4:
2202 if op.B2 < 2 {
2203 v = uint64(uint32(lo >> (op.B2 * 32)))
2204 } else {
2205 v = uint64(uint32(hi >> ((op.B2 - 2) * 32)))
2206 }
2207 case shapeI64x2, shapeF64x2:
2208 if op.B2 == 0 {
2209 v = lo
2210 } else {
2211 v = hi
2212 }
2213 }
2214 ce.pushValue(v)
2215 frame.pc++
2216 case operationKindV128Splat:
2217 v := ce.popValue()
2218 var hi, lo uint64
2219 switch op.B1 {
2220 case shapeI8x16:
2221 v8 := uint64(byte(v))<<56 | uint64(byte(v))<<48 | uint64(byte(v))<<40 | uint64(byte(v))<<32 |
2222 uint64(byte(v))<<24 | uint64(byte(v))<<16 | uint64(byte(v))<<8 | uint64(byte(v))
2223 hi, lo = v8, v8
2224 case shapeI16x8:
2225 v4 := uint64(uint16(v))<<48 | uint64(uint16(v))<<32 | uint64(uint16(v))<<16 | uint64(uint16(v))
2226 hi, lo = v4, v4
2227 case shapeI32x4, shapeF32x4:
2228 v2 := uint64(uint32(v))<<32 | uint64(uint32(v))
2229 lo, hi = v2, v2
2230 case shapeI64x2, shapeF64x2:
2231 lo, hi = v, v
2232 }
2233 ce.pushValue(lo)
2234 ce.pushValue(hi)
2235 frame.pc++
2236 case operationKindV128Swizzle:
2237 idxHi, idxLo := ce.popValue(), ce.popValue()
2238 baseHi, baseLo := ce.popValue(), ce.popValue()
2239 var newVal [16]byte
2240 for i := 0; i < 16; i++ {
2241 var id byte
2242 if i < 8 {
2243 id = byte(idxLo >> (i * 8))
2244 } else {
2245 id = byte(idxHi >> ((i - 8) * 8))
2246 }
2247 if id < 8 {
2248 newVal[i] = byte(baseLo >> (id * 8))
2249 } else if id < 16 {
2250 newVal[i] = byte(baseHi >> ((id - 8) * 8))
2251 }
2252 }
2253 ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
2254 ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
2255 frame.pc++
2256 case operationKindV128Shuffle:
2257 xHi, xLo, yHi, yLo := ce.popValue(), ce.popValue(), ce.popValue(), ce.popValue()
2258 var newVal [16]byte
2259 for i, l := range op.Us {
2260 if l < 8 {
2261 newVal[i] = byte(yLo >> (l * 8))
2262 } else if l < 16 {
2263 newVal[i] = byte(yHi >> ((l - 8) * 8))
2264 } else if l < 24 {
2265 newVal[i] = byte(xLo >> ((l - 16) * 8))
2266 } else if l < 32 {
2267 newVal[i] = byte(xHi >> ((l - 24) * 8))
2268 }
2269 }
2270 ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
2271 ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
2272 frame.pc++
2273 case operationKindV128AnyTrue:
2274 hi, lo := ce.popValue(), ce.popValue()
2275 if hi != 0 || lo != 0 {
2276 ce.pushValue(1)
2277 } else {
2278 ce.pushValue(0)
2279 }
2280 frame.pc++
2281 case operationKindV128AllTrue:
2282 hi, lo := ce.popValue(), ce.popValue()
2283 var ret bool
2284 switch op.B1 {
2285 case shapeI8x16:
2286 ret = (uint8(lo) != 0) && (uint8(lo>>8) != 0) && (uint8(lo>>16) != 0) && (uint8(lo>>24) != 0) &&
2287 (uint8(lo>>32) != 0) && (uint8(lo>>40) != 0) && (uint8(lo>>48) != 0) && (uint8(lo>>56) != 0) &&
2288 (uint8(hi) != 0) && (uint8(hi>>8) != 0) && (uint8(hi>>16) != 0) && (uint8(hi>>24) != 0) &&
2289 (uint8(hi>>32) != 0) && (uint8(hi>>40) != 0) && (uint8(hi>>48) != 0) && (uint8(hi>>56) != 0)
2290 case shapeI16x8:
2291 ret = (uint16(lo) != 0) && (uint16(lo>>16) != 0) && (uint16(lo>>32) != 0) && (uint16(lo>>48) != 0) &&
2292 (uint16(hi) != 0) && (uint16(hi>>16) != 0) && (uint16(hi>>32) != 0) && (uint16(hi>>48) != 0)
2293 case shapeI32x4:
2294 ret = (uint32(lo) != 0) && (uint32(lo>>32) != 0) &&
2295 (uint32(hi) != 0) && (uint32(hi>>32) != 0)
2296 case shapeI64x2:
2297 ret = (lo != 0) &&
2298 (hi != 0)
2299 }
2300 if ret {
2301 ce.pushValue(1)
2302 } else {
2303 ce.pushValue(0)
2304 }
2305 frame.pc++
2306 case operationKindV128BitMask:
2307 // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitmask-extraction
2308 hi, lo := ce.popValue(), ce.popValue()
2309 var res uint64
2310 switch op.B1 {
2311 case shapeI8x16:
2312 for i := 0; i < 8; i++ {
2313 if int8(lo>>(i*8)) < 0 {
2314 res |= 1 << i
2315 }
2316 }
2317 for i := 0; i < 8; i++ {
2318 if int8(hi>>(i*8)) < 0 {
2319 res |= 1 << (i + 8)
2320 }
2321 }
2322 case shapeI16x8:
2323 for i := 0; i < 4; i++ {
2324 if int16(lo>>(i*16)) < 0 {
2325 res |= 1 << i
2326 }
2327 }
2328 for i := 0; i < 4; i++ {
2329 if int16(hi>>(i*16)) < 0 {
2330 res |= 1 << (i + 4)
2331 }
2332 }
2333 case shapeI32x4:
2334 for i := 0; i < 2; i++ {
2335 if int32(lo>>(i*32)) < 0 {
2336 res |= 1 << i
2337 }
2338 }
2339 for i := 0; i < 2; i++ {
2340 if int32(hi>>(i*32)) < 0 {
2341 res |= 1 << (i + 2)
2342 }
2343 }
2344 case shapeI64x2:
2345 if int64(lo) < 0 {
2346 res |= 0b01
2347 }
2348 if int(hi) < 0 {
2349 res |= 0b10
2350 }
2351 }
2352 ce.pushValue(res)
2353 frame.pc++
2354 case operationKindV128And:
2355 x2Hi, x2Lo := ce.popValue(), ce.popValue()
2356 x1Hi, x1Lo := ce.popValue(), ce.popValue()
2357 ce.pushValue(x1Lo & x2Lo)
2358 ce.pushValue(x1Hi & x2Hi)
2359 frame.pc++
2360 case operationKindV128Not:
2361 hi, lo := ce.popValue(), ce.popValue()
2362 ce.pushValue(^lo)
2363 ce.pushValue(^hi)
2364 frame.pc++
2365 case operationKindV128Or:
2366 x2Hi, x2Lo := ce.popValue(), ce.popValue()
2367 x1Hi, x1Lo := ce.popValue(), ce.popValue()
2368 ce.pushValue(x1Lo | x2Lo)
2369 ce.pushValue(x1Hi | x2Hi)
2370 frame.pc++
2371 case operationKindV128Xor:
2372 x2Hi, x2Lo := ce.popValue(), ce.popValue()
2373 x1Hi, x1Lo := ce.popValue(), ce.popValue()
2374 ce.pushValue(x1Lo ^ x2Lo)
2375 ce.pushValue(x1Hi ^ x2Hi)
2376 frame.pc++
2377 case operationKindV128Bitselect:
2378 // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitwise-select
2379 cHi, cLo := ce.popValue(), ce.popValue()
2380 x2Hi, x2Lo := ce.popValue(), ce.popValue()
2381 x1Hi, x1Lo := ce.popValue(), ce.popValue()
2382 // v128.or(v128.and(v1, c), v128.and(v2, v128.not(c)))
2383 ce.pushValue((x1Lo & cLo) | (x2Lo & (^cLo)))
2384 ce.pushValue((x1Hi & cHi) | (x2Hi & (^cHi)))
2385 frame.pc++
2386 case operationKindV128AndNot:
2387 x2Hi, x2Lo := ce.popValue(), ce.popValue()
2388 x1Hi, x1Lo := ce.popValue(), ce.popValue()
2389 ce.pushValue(x1Lo & (^x2Lo))
2390 ce.pushValue(x1Hi & (^x2Hi))
2391 frame.pc++
2392 case operationKindV128Shl:
2393 s := ce.popValue()
2394 hi, lo := ce.popValue(), ce.popValue()
2395 switch op.B1 {
2396 case shapeI8x16:
2397 s = s % 8
2398 lo = uint64(uint8(lo<<s)) |
2399 uint64(uint8((lo>>8)<<s))<<8 |
2400 uint64(uint8((lo>>16)<<s))<<16 |
2401 uint64(uint8((lo>>24)<<s))<<24 |
2402 uint64(uint8((lo>>32)<<s))<<32 |
2403 uint64(uint8((lo>>40)<<s))<<40 |
2404 uint64(uint8((lo>>48)<<s))<<48 |
2405 uint64(uint8((lo>>56)<<s))<<56
2406 hi = uint64(uint8(hi<<s)) |
2407 uint64(uint8((hi>>8)<<s))<<8 |
2408 uint64(uint8((hi>>16)<<s))<<16 |
2409 uint64(uint8((hi>>24)<<s))<<24 |
2410 uint64(uint8((hi>>32)<<s))<<32 |
2411 uint64(uint8((hi>>40)<<s))<<40 |
2412 uint64(uint8((hi>>48)<<s))<<48 |
2413 uint64(uint8((hi>>56)<<s))<<56
2414 case shapeI16x8:
2415 s = s % 16
2416 lo = uint64(uint16(lo<<s)) |
2417 uint64(uint16((lo>>16)<<s))<<16 |
2418 uint64(uint16((lo>>32)<<s))<<32 |
2419 uint64(uint16((lo>>48)<<s))<<48
2420 hi = uint64(uint16(hi<<s)) |
2421 uint64(uint16((hi>>16)<<s))<<16 |
2422 uint64(uint16((hi>>32)<<s))<<32 |
2423 uint64(uint16((hi>>48)<<s))<<48
2424 case shapeI32x4:
2425 s = s % 32
2426 lo = uint64(uint32(lo<<s)) | uint64(uint32((lo>>32)<<s))<<32
2427 hi = uint64(uint32(hi<<s)) | uint64(uint32((hi>>32)<<s))<<32
2428 case shapeI64x2:
2429 s = s % 64
2430 lo = lo << s
2431 hi = hi << s
2432 }
2433 ce.pushValue(lo)
2434 ce.pushValue(hi)
2435 frame.pc++
2436 case operationKindV128Shr:
2437 s := ce.popValue()
2438 hi, lo := ce.popValue(), ce.popValue()
2439 switch op.B1 {
2440 case shapeI8x16:
2441 s = s % 8
2442 if op.B3 { // signed
2443 lo = uint64(uint8(int8(lo)>>s)) |
2444 uint64(uint8(int8(lo>>8)>>s))<<8 |
2445 uint64(uint8(int8(lo>>16)>>s))<<16 |
2446 uint64(uint8(int8(lo>>24)>>s))<<24 |
2447 uint64(uint8(int8(lo>>32)>>s))<<32 |
2448 uint64(uint8(int8(lo>>40)>>s))<<40 |
2449 uint64(uint8(int8(lo>>48)>>s))<<48 |
2450 uint64(uint8(int8(lo>>56)>>s))<<56
2451 hi = uint64(uint8(int8(hi)>>s)) |
2452 uint64(uint8(int8(hi>>8)>>s))<<8 |
2453 uint64(uint8(int8(hi>>16)>>s))<<16 |
2454 uint64(uint8(int8(hi>>24)>>s))<<24 |
2455 uint64(uint8(int8(hi>>32)>>s))<<32 |
2456 uint64(uint8(int8(hi>>40)>>s))<<40 |
2457 uint64(uint8(int8(hi>>48)>>s))<<48 |
2458 uint64(uint8(int8(hi>>56)>>s))<<56
2459 } else {
2460 lo = uint64(uint8(lo)>>s) |
2461 uint64(uint8(lo>>8)>>s)<<8 |
2462 uint64(uint8(lo>>16)>>s)<<16 |
2463 uint64(uint8(lo>>24)>>s)<<24 |
2464 uint64(uint8(lo>>32)>>s)<<32 |
2465 uint64(uint8(lo>>40)>>s)<<40 |
2466 uint64(uint8(lo>>48)>>s)<<48 |
2467 uint64(uint8(lo>>56)>>s)<<56
2468 hi = uint64(uint8(hi)>>s) |
2469 uint64(uint8(hi>>8)>>s)<<8 |
2470 uint64(uint8(hi>>16)>>s)<<16 |
2471 uint64(uint8(hi>>24)>>s)<<24 |
2472 uint64(uint8(hi>>32)>>s)<<32 |
2473 uint64(uint8(hi>>40)>>s)<<40 |
2474 uint64(uint8(hi>>48)>>s)<<48 |
2475 uint64(uint8(hi>>56)>>s)<<56
2476 }
2477 case shapeI16x8:
2478 s = s % 16
2479 if op.B3 { // signed
2480 lo = uint64(uint16(int16(lo)>>s)) |
2481 uint64(uint16(int16(lo>>16)>>s))<<16 |
2482 uint64(uint16(int16(lo>>32)>>s))<<32 |
2483 uint64(uint16(int16(lo>>48)>>s))<<48
2484 hi = uint64(uint16(int16(hi)>>s)) |
2485 uint64(uint16(int16(hi>>16)>>s))<<16 |
2486 uint64(uint16(int16(hi>>32)>>s))<<32 |
2487 uint64(uint16(int16(hi>>48)>>s))<<48
2488 } else {
2489 lo = uint64(uint16(lo)>>s) |
2490 uint64(uint16(lo>>16)>>s)<<16 |
2491 uint64(uint16(lo>>32)>>s)<<32 |
2492 uint64(uint16(lo>>48)>>s)<<48
2493 hi = uint64(uint16(hi)>>s) |
2494 uint64(uint16(hi>>16)>>s)<<16 |
2495 uint64(uint16(hi>>32)>>s)<<32 |
2496 uint64(uint16(hi>>48)>>s)<<48
2497 }
2498 case shapeI32x4:
2499 s = s % 32
2500 if op.B3 {
2501 lo = uint64(uint32(int32(lo)>>s)) | uint64(uint32(int32(lo>>32)>>s))<<32
2502 hi = uint64(uint32(int32(hi)>>s)) | uint64(uint32(int32(hi>>32)>>s))<<32
2503 } else {
2504 lo = uint64(uint32(lo)>>s) | uint64(uint32(lo>>32)>>s)<<32
2505 hi = uint64(uint32(hi)>>s) | uint64(uint32(hi>>32)>>s)<<32
2506 }
2507 case shapeI64x2:
2508 s = s % 64
2509 if op.B3 { // signed
2510 lo = uint64(int64(lo) >> s)
2511 hi = uint64(int64(hi) >> s)
2512 } else {
2513 lo = lo >> s
2514 hi = hi >> s
2515 }
2516
2517 }
2518 ce.pushValue(lo)
2519 ce.pushValue(hi)
2520 frame.pc++
2521 case operationKindV128Cmp:
2522 x2Hi, x2Lo := ce.popValue(), ce.popValue()
2523 x1Hi, x1Lo := ce.popValue(), ce.popValue()
2524 var result []bool
2525 switch op.B1 {
2526 case v128CmpTypeI8x16Eq:
2527 result = []bool{
2528 byte(x1Lo>>0) == byte(x2Lo>>0), byte(x1Lo>>8) == byte(x2Lo>>8),
2529 byte(x1Lo>>16) == byte(x2Lo>>16), byte(x1Lo>>24) == byte(x2Lo>>24),
2530 byte(x1Lo>>32) == byte(x2Lo>>32), byte(x1Lo>>40) == byte(x2Lo>>40),
2531 byte(x1Lo>>48) == byte(x2Lo>>48), byte(x1Lo>>56) == byte(x2Lo>>56),
2532 byte(x1Hi>>0) == byte(x2Hi>>0), byte(x1Hi>>8) == byte(x2Hi>>8),
2533 byte(x1Hi>>16) == byte(x2Hi>>16), byte(x1Hi>>24) == byte(x2Hi>>24),
2534 byte(x1Hi>>32) == byte(x2Hi>>32), byte(x1Hi>>40) == byte(x2Hi>>40),
2535 byte(x1Hi>>48) == byte(x2Hi>>48), byte(x1Hi>>56) == byte(x2Hi>>56),
2536 }
2537 case v128CmpTypeI8x16Ne:
2538 result = []bool{
2539 byte(x1Lo>>0) != byte(x2Lo>>0), byte(x1Lo>>8) != byte(x2Lo>>8),
2540 byte(x1Lo>>16) != byte(x2Lo>>16), byte(x1Lo>>24) != byte(x2Lo>>24),
2541 byte(x1Lo>>32) != byte(x2Lo>>32), byte(x1Lo>>40) != byte(x2Lo>>40),
2542 byte(x1Lo>>48) != byte(x2Lo>>48), byte(x1Lo>>56) != byte(x2Lo>>56),
2543 byte(x1Hi>>0) != byte(x2Hi>>0), byte(x1Hi>>8) != byte(x2Hi>>8),
2544 byte(x1Hi>>16) != byte(x2Hi>>16), byte(x1Hi>>24) != byte(x2Hi>>24),
2545 byte(x1Hi>>32) != byte(x2Hi>>32), byte(x1Hi>>40) != byte(x2Hi>>40),
2546 byte(x1Hi>>48) != byte(x2Hi>>48), byte(x1Hi>>56) != byte(x2Hi>>56),
2547 }
2548 case v128CmpTypeI8x16LtS:
2549 result = []bool{
2550 int8(x1Lo>>0) < int8(x2Lo>>0), int8(x1Lo>>8) < int8(x2Lo>>8),
2551 int8(x1Lo>>16) < int8(x2Lo>>16), int8(x1Lo>>24) < int8(x2Lo>>24),
2552 int8(x1Lo>>32) < int8(x2Lo>>32), int8(x1Lo>>40) < int8(x2Lo>>40),
2553 int8(x1Lo>>48) < int8(x2Lo>>48), int8(x1Lo>>56) < int8(x2Lo>>56),
2554 int8(x1Hi>>0) < int8(x2Hi>>0), int8(x1Hi>>8) < int8(x2Hi>>8),
2555 int8(x1Hi>>16) < int8(x2Hi>>16), int8(x1Hi>>24) < int8(x2Hi>>24),
2556 int8(x1Hi>>32) < int8(x2Hi>>32), int8(x1Hi>>40) < int8(x2Hi>>40),
2557 int8(x1Hi>>48) < int8(x2Hi>>48), int8(x1Hi>>56) < int8(x2Hi>>56),
2558 }
2559 case v128CmpTypeI8x16LtU:
2560 result = []bool{
2561 byte(x1Lo>>0) < byte(x2Lo>>0), byte(x1Lo>>8) < byte(x2Lo>>8),
2562 byte(x1Lo>>16) < byte(x2Lo>>16), byte(x1Lo>>24) < byte(x2Lo>>24),
2563 byte(x1Lo>>32) < byte(x2Lo>>32), byte(x1Lo>>40) < byte(x2Lo>>40),
2564 byte(x1Lo>>48) < byte(x2Lo>>48), byte(x1Lo>>56) < byte(x2Lo>>56),
2565 byte(x1Hi>>0) < byte(x2Hi>>0), byte(x1Hi>>8) < byte(x2Hi>>8),
2566 byte(x1Hi>>16) < byte(x2Hi>>16), byte(x1Hi>>24) < byte(x2Hi>>24),
2567 byte(x1Hi>>32) < byte(x2Hi>>32), byte(x1Hi>>40) < byte(x2Hi>>40),
2568 byte(x1Hi>>48) < byte(x2Hi>>48), byte(x1Hi>>56) < byte(x2Hi>>56),
2569 }
2570 case v128CmpTypeI8x16GtS:
2571 result = []bool{
2572 int8(x1Lo>>0) > int8(x2Lo>>0), int8(x1Lo>>8) > int8(x2Lo>>8),
2573 int8(x1Lo>>16) > int8(x2Lo>>16), int8(x1Lo>>24) > int8(x2Lo>>24),
2574 int8(x1Lo>>32) > int8(x2Lo>>32), int8(x1Lo>>40) > int8(x2Lo>>40),
2575 int8(x1Lo>>48) > int8(x2Lo>>48), int8(x1Lo>>56) > int8(x2Lo>>56),
2576 int8(x1Hi>>0) > int8(x2Hi>>0), int8(x1Hi>>8) > int8(x2Hi>>8),
2577 int8(x1Hi>>16) > int8(x2Hi>>16), int8(x1Hi>>24) > int8(x2Hi>>24),
2578 int8(x1Hi>>32) > int8(x2Hi>>32), int8(x1Hi>>40) > int8(x2Hi>>40),
2579 int8(x1Hi>>48) > int8(x2Hi>>48), int8(x1Hi>>56) > int8(x2Hi>>56),
2580 }
2581 case v128CmpTypeI8x16GtU:
2582 result = []bool{
2583 byte(x1Lo>>0) > byte(x2Lo>>0), byte(x1Lo>>8) > byte(x2Lo>>8),
2584 byte(x1Lo>>16) > byte(x2Lo>>16), byte(x1Lo>>24) > byte(x2Lo>>24),
2585 byte(x1Lo>>32) > byte(x2Lo>>32), byte(x1Lo>>40) > byte(x2Lo>>40),
2586 byte(x1Lo>>48) > byte(x2Lo>>48), byte(x1Lo>>56) > byte(x2Lo>>56),
2587 byte(x1Hi>>0) > byte(x2Hi>>0), byte(x1Hi>>8) > byte(x2Hi>>8),
2588 byte(x1Hi>>16) > byte(x2Hi>>16), byte(x1Hi>>24) > byte(x2Hi>>24),
2589 byte(x1Hi>>32) > byte(x2Hi>>32), byte(x1Hi>>40) > byte(x2Hi>>40),
2590 byte(x1Hi>>48) > byte(x2Hi>>48), byte(x1Hi>>56) > byte(x2Hi>>56),
2591 }
2592 case v128CmpTypeI8x16LeS:
2593 result = []bool{
2594 int8(x1Lo>>0) <= int8(x2Lo>>0), int8(x1Lo>>8) <= int8(x2Lo>>8),
2595 int8(x1Lo>>16) <= int8(x2Lo>>16), int8(x1Lo>>24) <= int8(x2Lo>>24),
2596 int8(x1Lo>>32) <= int8(x2Lo>>32), int8(x1Lo>>40) <= int8(x2Lo>>40),
2597 int8(x1Lo>>48) <= int8(x2Lo>>48), int8(x1Lo>>56) <= int8(x2Lo>>56),
2598 int8(x1Hi>>0) <= int8(x2Hi>>0), int8(x1Hi>>8) <= int8(x2Hi>>8),
2599 int8(x1Hi>>16) <= int8(x2Hi>>16), int8(x1Hi>>24) <= int8(x2Hi>>24),
2600 int8(x1Hi>>32) <= int8(x2Hi>>32), int8(x1Hi>>40) <= int8(x2Hi>>40),
2601 int8(x1Hi>>48) <= int8(x2Hi>>48), int8(x1Hi>>56) <= int8(x2Hi>>56),
2602 }
2603 case v128CmpTypeI8x16LeU:
2604 result = []bool{
2605 byte(x1Lo>>0) <= byte(x2Lo>>0), byte(x1Lo>>8) <= byte(x2Lo>>8),
2606 byte(x1Lo>>16) <= byte(x2Lo>>16), byte(x1Lo>>24) <= byte(x2Lo>>24),
2607 byte(x1Lo>>32) <= byte(x2Lo>>32), byte(x1Lo>>40) <= byte(x2Lo>>40),
2608 byte(x1Lo>>48) <= byte(x2Lo>>48), byte(x1Lo>>56) <= byte(x2Lo>>56),
2609 byte(x1Hi>>0) <= byte(x2Hi>>0), byte(x1Hi>>8) <= byte(x2Hi>>8),
2610 byte(x1Hi>>16) <= byte(x2Hi>>16), byte(x1Hi>>24) <= byte(x2Hi>>24),
2611 byte(x1Hi>>32) <= byte(x2Hi>>32), byte(x1Hi>>40) <= byte(x2Hi>>40),
2612 byte(x1Hi>>48) <= byte(x2Hi>>48), byte(x1Hi>>56) <= byte(x2Hi>>56),
2613 }
2614 case v128CmpTypeI8x16GeS:
2615 result = []bool{
2616 int8(x1Lo>>0) >= int8(x2Lo>>0), int8(x1Lo>>8) >= int8(x2Lo>>8),
2617 int8(x1Lo>>16) >= int8(x2Lo>>16), int8(x1Lo>>24) >= int8(x2Lo>>24),
2618 int8(x1Lo>>32) >= int8(x2Lo>>32), int8(x1Lo>>40) >= int8(x2Lo>>40),
2619 int8(x1Lo>>48) >= int8(x2Lo>>48), int8(x1Lo>>56) >= int8(x2Lo>>56),
2620 int8(x1Hi>>0) >= int8(x2Hi>>0), int8(x1Hi>>8) >= int8(x2Hi>>8),
2621 int8(x1Hi>>16) >= int8(x2Hi>>16), int8(x1Hi>>24) >= int8(x2Hi>>24),
2622 int8(x1Hi>>32) >= int8(x2Hi>>32), int8(x1Hi>>40) >= int8(x2Hi>>40),
2623 int8(x1Hi>>48) >= int8(x2Hi>>48), int8(x1Hi>>56) >= int8(x2Hi>>56),
2624 }
2625 case v128CmpTypeI8x16GeU:
2626 result = []bool{
2627 byte(x1Lo>>0) >= byte(x2Lo>>0), byte(x1Lo>>8) >= byte(x2Lo>>8),
2628 byte(x1Lo>>16) >= byte(x2Lo>>16), byte(x1Lo>>24) >= byte(x2Lo>>24),
2629 byte(x1Lo>>32) >= byte(x2Lo>>32), byte(x1Lo>>40) >= byte(x2Lo>>40),
2630 byte(x1Lo>>48) >= byte(x2Lo>>48), byte(x1Lo>>56) >= byte(x2Lo>>56),
2631 byte(x1Hi>>0) >= byte(x2Hi>>0), byte(x1Hi>>8) >= byte(x2Hi>>8),
2632 byte(x1Hi>>16) >= byte(x2Hi>>16), byte(x1Hi>>24) >= byte(x2Hi>>24),
2633 byte(x1Hi>>32) >= byte(x2Hi>>32), byte(x1Hi>>40) >= byte(x2Hi>>40),
2634 byte(x1Hi>>48) >= byte(x2Hi>>48), byte(x1Hi>>56) >= byte(x2Hi>>56),
2635 }
2636 case v128CmpTypeI16x8Eq:
2637 result = []bool{
2638 uint16(x1Lo>>0) == uint16(x2Lo>>0), uint16(x1Lo>>16) == uint16(x2Lo>>16),
2639 uint16(x1Lo>>32) == uint16(x2Lo>>32), uint16(x1Lo>>48) == uint16(x2Lo>>48),
2640 uint16(x1Hi>>0) == uint16(x2Hi>>0), uint16(x1Hi>>16) == uint16(x2Hi>>16),
2641 uint16(x1Hi>>32) == uint16(x2Hi>>32), uint16(x1Hi>>48) == uint16(x2Hi>>48),
2642 }
2643 case v128CmpTypeI16x8Ne:
2644 result = []bool{
2645 uint16(x1Lo>>0) != uint16(x2Lo>>0), uint16(x1Lo>>16) != uint16(x2Lo>>16),
2646 uint16(x1Lo>>32) != uint16(x2Lo>>32), uint16(x1Lo>>48) != uint16(x2Lo>>48),
2647 uint16(x1Hi>>0) != uint16(x2Hi>>0), uint16(x1Hi>>16) != uint16(x2Hi>>16),
2648 uint16(x1Hi>>32) != uint16(x2Hi>>32), uint16(x1Hi>>48) != uint16(x2Hi>>48),
2649 }
2650 case v128CmpTypeI16x8LtS:
2651 result = []bool{
2652 int16(x1Lo>>0) < int16(x2Lo>>0), int16(x1Lo>>16) < int16(x2Lo>>16),
2653 int16(x1Lo>>32) < int16(x2Lo>>32), int16(x1Lo>>48) < int16(x2Lo>>48),
2654 int16(x1Hi>>0) < int16(x2Hi>>0), int16(x1Hi>>16) < int16(x2Hi>>16),
2655 int16(x1Hi>>32) < int16(x2Hi>>32), int16(x1Hi>>48) < int16(x2Hi>>48),
2656 }
2657 case v128CmpTypeI16x8LtU:
2658 result = []bool{
2659 uint16(x1Lo>>0) < uint16(x2Lo>>0), uint16(x1Lo>>16) < uint16(x2Lo>>16),
2660 uint16(x1Lo>>32) < uint16(x2Lo>>32), uint16(x1Lo>>48) < uint16(x2Lo>>48),
2661 uint16(x1Hi>>0) < uint16(x2Hi>>0), uint16(x1Hi>>16) < uint16(x2Hi>>16),
2662 uint16(x1Hi>>32) < uint16(x2Hi>>32), uint16(x1Hi>>48) < uint16(x2Hi>>48),
2663 }
2664 case v128CmpTypeI16x8GtS:
2665 result = []bool{
2666 int16(x1Lo>>0) > int16(x2Lo>>0), int16(x1Lo>>16) > int16(x2Lo>>16),
2667 int16(x1Lo>>32) > int16(x2Lo>>32), int16(x1Lo>>48) > int16(x2Lo>>48),
2668 int16(x1Hi>>0) > int16(x2Hi>>0), int16(x1Hi>>16) > int16(x2Hi>>16),
2669 int16(x1Hi>>32) > int16(x2Hi>>32), int16(x1Hi>>48) > int16(x2Hi>>48),
2670 }
2671 case v128CmpTypeI16x8GtU:
2672 result = []bool{
2673 uint16(x1Lo>>0) > uint16(x2Lo>>0), uint16(x1Lo>>16) > uint16(x2Lo>>16),
2674 uint16(x1Lo>>32) > uint16(x2Lo>>32), uint16(x1Lo>>48) > uint16(x2Lo>>48),
2675 uint16(x1Hi>>0) > uint16(x2Hi>>0), uint16(x1Hi>>16) > uint16(x2Hi>>16),
2676 uint16(x1Hi>>32) > uint16(x2Hi>>32), uint16(x1Hi>>48) > uint16(x2Hi>>48),
2677 }
2678 case v128CmpTypeI16x8LeS:
2679 result = []bool{
2680 int16(x1Lo>>0) <= int16(x2Lo>>0), int16(x1Lo>>16) <= int16(x2Lo>>16),
2681 int16(x1Lo>>32) <= int16(x2Lo>>32), int16(x1Lo>>48) <= int16(x2Lo>>48),
2682 int16(x1Hi>>0) <= int16(x2Hi>>0), int16(x1Hi>>16) <= int16(x2Hi>>16),
2683 int16(x1Hi>>32) <= int16(x2Hi>>32), int16(x1Hi>>48) <= int16(x2Hi>>48),
2684 }
2685 case v128CmpTypeI16x8LeU:
2686 result = []bool{
2687 uint16(x1Lo>>0) <= uint16(x2Lo>>0), uint16(x1Lo>>16) <= uint16(x2Lo>>16),
2688 uint16(x1Lo>>32) <= uint16(x2Lo>>32), uint16(x1Lo>>48) <= uint16(x2Lo>>48),
2689 uint16(x1Hi>>0) <= uint16(x2Hi>>0), uint16(x1Hi>>16) <= uint16(x2Hi>>16),
2690 uint16(x1Hi>>32) <= uint16(x2Hi>>32), uint16(x1Hi>>48) <= uint16(x2Hi>>48),
2691 }
2692 case v128CmpTypeI16x8GeS:
2693 result = []bool{
2694 int16(x1Lo>>0) >= int16(x2Lo>>0), int16(x1Lo>>16) >= int16(x2Lo>>16),
2695 int16(x1Lo>>32) >= int16(x2Lo>>32), int16(x1Lo>>48) >= int16(x2Lo>>48),
2696 int16(x1Hi>>0) >= int16(x2Hi>>0), int16(x1Hi>>16) >= int16(x2Hi>>16),
2697 int16(x1Hi>>32) >= int16(x2Hi>>32), int16(x1Hi>>48) >= int16(x2Hi>>48),
2698 }
2699 case v128CmpTypeI16x8GeU:
2700 result = []bool{
2701 uint16(x1Lo>>0) >= uint16(x2Lo>>0), uint16(x1Lo>>16) >= uint16(x2Lo>>16),
2702 uint16(x1Lo>>32) >= uint16(x2Lo>>32), uint16(x1Lo>>48) >= uint16(x2Lo>>48),
2703 uint16(x1Hi>>0) >= uint16(x2Hi>>0), uint16(x1Hi>>16) >= uint16(x2Hi>>16),
2704 uint16(x1Hi>>32) >= uint16(x2Hi>>32), uint16(x1Hi>>48) >= uint16(x2Hi>>48),
2705 }
2706 case v128CmpTypeI32x4Eq:
2707 result = []bool{
2708 uint32(x1Lo>>0) == uint32(x2Lo>>0), uint32(x1Lo>>32) == uint32(x2Lo>>32),
2709 uint32(x1Hi>>0) == uint32(x2Hi>>0), uint32(x1Hi>>32) == uint32(x2Hi>>32),
2710 }
2711 case v128CmpTypeI32x4Ne:
2712 result = []bool{
2713 uint32(x1Lo>>0) != uint32(x2Lo>>0), uint32(x1Lo>>32) != uint32(x2Lo>>32),
2714 uint32(x1Hi>>0) != uint32(x2Hi>>0), uint32(x1Hi>>32) != uint32(x2Hi>>32),
2715 }
2716 case v128CmpTypeI32x4LtS:
2717 result = []bool{
2718 int32(x1Lo>>0) < int32(x2Lo>>0), int32(x1Lo>>32) < int32(x2Lo>>32),
2719 int32(x1Hi>>0) < int32(x2Hi>>0), int32(x1Hi>>32) < int32(x2Hi>>32),
2720 }
2721 case v128CmpTypeI32x4LtU:
2722 result = []bool{
2723 uint32(x1Lo>>0) < uint32(x2Lo>>0), uint32(x1Lo>>32) < uint32(x2Lo>>32),
2724 uint32(x1Hi>>0) < uint32(x2Hi>>0), uint32(x1Hi>>32) < uint32(x2Hi>>32),
2725 }
2726 case v128CmpTypeI32x4GtS:
2727 result = []bool{
2728 int32(x1Lo>>0) > int32(x2Lo>>0), int32(x1Lo>>32) > int32(x2Lo>>32),
2729 int32(x1Hi>>0) > int32(x2Hi>>0), int32(x1Hi>>32) > int32(x2Hi>>32),
2730 }
2731 case v128CmpTypeI32x4GtU:
2732 result = []bool{
2733 uint32(x1Lo>>0) > uint32(x2Lo>>0), uint32(x1Lo>>32) > uint32(x2Lo>>32),
2734 uint32(x1Hi>>0) > uint32(x2Hi>>0), uint32(x1Hi>>32) > uint32(x2Hi>>32),
2735 }
2736 case v128CmpTypeI32x4LeS:
2737 result = []bool{
2738 int32(x1Lo>>0) <= int32(x2Lo>>0), int32(x1Lo>>32) <= int32(x2Lo>>32),
2739 int32(x1Hi>>0) <= int32(x2Hi>>0), int32(x1Hi>>32) <= int32(x2Hi>>32),
2740 }
2741 case v128CmpTypeI32x4LeU:
2742 result = []bool{
2743 uint32(x1Lo>>0) <= uint32(x2Lo>>0), uint32(x1Lo>>32) <= uint32(x2Lo>>32),
2744 uint32(x1Hi>>0) <= uint32(x2Hi>>0), uint32(x1Hi>>32) <= uint32(x2Hi>>32),
2745 }
2746 case v128CmpTypeI32x4GeS:
2747 result = []bool{
2748 int32(x1Lo>>0) >= int32(x2Lo>>0), int32(x1Lo>>32) >= int32(x2Lo>>32),
2749 int32(x1Hi>>0) >= int32(x2Hi>>0), int32(x1Hi>>32) >= int32(x2Hi>>32),
2750 }
2751 case v128CmpTypeI32x4GeU:
2752 result = []bool{
2753 uint32(x1Lo>>0) >= uint32(x2Lo>>0), uint32(x1Lo>>32) >= uint32(x2Lo>>32),
2754 uint32(x1Hi>>0) >= uint32(x2Hi>>0), uint32(x1Hi>>32) >= uint32(x2Hi>>32),
2755 }
2756 case v128CmpTypeI64x2Eq:
2757 result = []bool{x1Lo == x2Lo, x1Hi == x2Hi}
2758 case v128CmpTypeI64x2Ne:
2759 result = []bool{x1Lo != x2Lo, x1Hi != x2Hi}
2760 case v128CmpTypeI64x2LtS:
2761 result = []bool{int64(x1Lo) < int64(x2Lo), int64(x1Hi) < int64(x2Hi)}
2762 case v128CmpTypeI64x2GtS:
2763 result = []bool{int64(x1Lo) > int64(x2Lo), int64(x1Hi) > int64(x2Hi)}
2764 case v128CmpTypeI64x2LeS:
2765 result = []bool{int64(x1Lo) <= int64(x2Lo), int64(x1Hi) <= int64(x2Hi)}
2766 case v128CmpTypeI64x2GeS:
2767 result = []bool{int64(x1Lo) >= int64(x2Lo), int64(x1Hi) >= int64(x2Hi)}
2768 case v128CmpTypeF32x4Eq:
2769 result = []bool{
2770 math.Float32frombits(uint32(x1Lo>>0)) == math.Float32frombits(uint32(x2Lo>>0)),
2771 math.Float32frombits(uint32(x1Lo>>32)) == math.Float32frombits(uint32(x2Lo>>32)),
2772 math.Float32frombits(uint32(x1Hi>>0)) == math.Float32frombits(uint32(x2Hi>>0)),
2773 math.Float32frombits(uint32(x1Hi>>32)) == math.Float32frombits(uint32(x2Hi>>32)),
2774 }
2775 case v128CmpTypeF32x4Ne:
2776 result = []bool{
2777 math.Float32frombits(uint32(x1Lo>>0)) != math.Float32frombits(uint32(x2Lo>>0)),
2778 math.Float32frombits(uint32(x1Lo>>32)) != math.Float32frombits(uint32(x2Lo>>32)),
2779 math.Float32frombits(uint32(x1Hi>>0)) != math.Float32frombits(uint32(x2Hi>>0)),
2780 math.Float32frombits(uint32(x1Hi>>32)) != math.Float32frombits(uint32(x2Hi>>32)),
2781 }
2782 case v128CmpTypeF32x4Lt:
2783 result = []bool{
2784 math.Float32frombits(uint32(x1Lo>>0)) < math.Float32frombits(uint32(x2Lo>>0)),
2785 math.Float32frombits(uint32(x1Lo>>32)) < math.Float32frombits(uint32(x2Lo>>32)),
2786 math.Float32frombits(uint32(x1Hi>>0)) < math.Float32frombits(uint32(x2Hi>>0)),
2787 math.Float32frombits(uint32(x1Hi>>32)) < math.Float32frombits(uint32(x2Hi>>32)),
2788 }
2789 case v128CmpTypeF32x4Gt:
2790 result = []bool{
2791 math.Float32frombits(uint32(x1Lo>>0)) > math.Float32frombits(uint32(x2Lo>>0)),
2792 math.Float32frombits(uint32(x1Lo>>32)) > math.Float32frombits(uint32(x2Lo>>32)),
2793 math.Float32frombits(uint32(x1Hi>>0)) > math.Float32frombits(uint32(x2Hi>>0)),
2794 math.Float32frombits(uint32(x1Hi>>32)) > math.Float32frombits(uint32(x2Hi>>32)),
2795 }
2796 case v128CmpTypeF32x4Le:
2797 result = []bool{
2798 math.Float32frombits(uint32(x1Lo>>0)) <= math.Float32frombits(uint32(x2Lo>>0)),
2799 math.Float32frombits(uint32(x1Lo>>32)) <= math.Float32frombits(uint32(x2Lo>>32)),
2800 math.Float32frombits(uint32(x1Hi>>0)) <= math.Float32frombits(uint32(x2Hi>>0)),
2801 math.Float32frombits(uint32(x1Hi>>32)) <= math.Float32frombits(uint32(x2Hi>>32)),
2802 }
2803 case v128CmpTypeF32x4Ge:
2804 result = []bool{
2805 math.Float32frombits(uint32(x1Lo>>0)) >= math.Float32frombits(uint32(x2Lo>>0)),
2806 math.Float32frombits(uint32(x1Lo>>32)) >= math.Float32frombits(uint32(x2Lo>>32)),
2807 math.Float32frombits(uint32(x1Hi>>0)) >= math.Float32frombits(uint32(x2Hi>>0)),
2808 math.Float32frombits(uint32(x1Hi>>32)) >= math.Float32frombits(uint32(x2Hi>>32)),
2809 }
2810 case v128CmpTypeF64x2Eq:
2811 result = []bool{
2812 math.Float64frombits(x1Lo) == math.Float64frombits(x2Lo),
2813 math.Float64frombits(x1Hi) == math.Float64frombits(x2Hi),
2814 }
2815 case v128CmpTypeF64x2Ne:
2816 result = []bool{
2817 math.Float64frombits(x1Lo) != math.Float64frombits(x2Lo),
2818 math.Float64frombits(x1Hi) != math.Float64frombits(x2Hi),
2819 }
2820 case v128CmpTypeF64x2Lt:
2821 result = []bool{
2822 math.Float64frombits(x1Lo) < math.Float64frombits(x2Lo),
2823 math.Float64frombits(x1Hi) < math.Float64frombits(x2Hi),
2824 }
2825 case v128CmpTypeF64x2Gt:
2826 result = []bool{
2827 math.Float64frombits(x1Lo) > math.Float64frombits(x2Lo),
2828 math.Float64frombits(x1Hi) > math.Float64frombits(x2Hi),
2829 }
2830 case v128CmpTypeF64x2Le:
2831 result = []bool{
2832 math.Float64frombits(x1Lo) <= math.Float64frombits(x2Lo),
2833 math.Float64frombits(x1Hi) <= math.Float64frombits(x2Hi),
2834 }
2835 case v128CmpTypeF64x2Ge:
2836 result = []bool{
2837 math.Float64frombits(x1Lo) >= math.Float64frombits(x2Lo),
2838 math.Float64frombits(x1Hi) >= math.Float64frombits(x2Hi),
2839 }
2840 }
2841
2842 var retLo, retHi uint64
2843 laneNum := len(result)
2844 switch laneNum {
2845 case 16:
2846 for i, b := range result {
2847 if b {
2848 if i < 8 {
2849 retLo |= 0xff << (i * 8)
2850 } else {
2851 retHi |= 0xff << ((i - 8) * 8)
2852 }
2853 }
2854 }
2855 case 8:
2856 for i, b := range result {
2857 if b {
2858 if i < 4 {
2859 retLo |= 0xffff << (i * 16)
2860 } else {
2861 retHi |= 0xffff << ((i - 4) * 16)
2862 }
2863 }
2864 }
2865 case 4:
2866 for i, b := range result {
2867 if b {
2868 if i < 2 {
2869 retLo |= 0xffff_ffff << (i * 32)
2870 } else {
2871 retHi |= 0xffff_ffff << ((i - 2) * 32)
2872 }
2873 }
2874 }
2875 case 2:
2876 if result[0] {
2877 retLo = ^uint64(0)
2878 }
2879 if result[1] {
2880 retHi = ^uint64(0)
2881 }
2882 }
2883
2884 ce.pushValue(retLo)
2885 ce.pushValue(retHi)
2886 frame.pc++
2887 case operationKindV128AddSat:
2888 x2hi, x2Lo := ce.popValue(), ce.popValue()
2889 x1hi, x1Lo := ce.popValue(), ce.popValue()
2890
2891 var retLo, retHi uint64
2892
2893 // Lane-wise addition while saturating the overflowing values.
2894 // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-addition
2895 switch op.B1 {
2896 case shapeI8x16:
2897 for i := 0; i < 16; i++ {
2898 var v, w byte
2899 if i < 8 {
2900 v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
2901 } else {
2902 v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
2903 }
2904
2905 var uv uint64
2906 if op.B3 { // signed
2907 if subbed := int64(int8(v)) + int64(int8(w)); subbed < math.MinInt8 {
2908 uv = uint64(byte(0x80))
2909 } else if subbed > math.MaxInt8 {
2910 uv = uint64(byte(0x7f))
2911 } else {
2912 uv = uint64(byte(int8(subbed)))
2913 }
2914 } else {
2915 if subbed := int64(v) + int64(w); subbed < 0 {
2916 uv = uint64(byte(0))
2917 } else if subbed > math.MaxUint8 {
2918 uv = uint64(byte(0xff))
2919 } else {
2920 uv = uint64(byte(subbed))
2921 }
2922 }
2923
2924 if i < 8 { // first 8 lanes are on lower 64bits.
2925 retLo |= uv << (i * 8)
2926 } else {
2927 retHi |= uv << ((i - 8) * 8)
2928 }
2929 }
2930 case shapeI16x8:
2931 for i := 0; i < 8; i++ {
2932 var v, w uint16
2933 if i < 4 {
2934 v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
2935 } else {
2936 v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
2937 }
2938
2939 var uv uint64
2940 if op.B3 { // signed
2941 if added := int64(int16(v)) + int64(int16(w)); added < math.MinInt16 {
2942 uv = uint64(uint16(0x8000))
2943 } else if added > math.MaxInt16 {
2944 uv = uint64(uint16(0x7fff))
2945 } else {
2946 uv = uint64(uint16(int16(added)))
2947 }
2948 } else {
2949 if added := int64(v) + int64(w); added < 0 {
2950 uv = uint64(uint16(0))
2951 } else if added > math.MaxUint16 {
2952 uv = uint64(uint16(0xffff))
2953 } else {
2954 uv = uint64(uint16(added))
2955 }
2956 }
2957
2958 if i < 4 { // first 4 lanes are on lower 64bits.
2959 retLo |= uv << (i * 16)
2960 } else {
2961 retHi |= uv << ((i - 4) * 16)
2962 }
2963 }
2964 }
2965
2966 ce.pushValue(retLo)
2967 ce.pushValue(retHi)
2968 frame.pc++
2969 case operationKindV128SubSat:
2970 x2hi, x2Lo := ce.popValue(), ce.popValue()
2971 x1hi, x1Lo := ce.popValue(), ce.popValue()
2972
2973 var retLo, retHi uint64
2974
2975 // Lane-wise subtraction while saturating the overflowing values.
2976 // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-subtraction
2977 switch op.B1 {
2978 case shapeI8x16:
2979 for i := 0; i < 16; i++ {
2980 var v, w byte
2981 if i < 8 {
2982 v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
2983 } else {
2984 v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
2985 }
2986
2987 var uv uint64
2988 if op.B3 { // signed
2989 if subbed := int64(int8(v)) - int64(int8(w)); subbed < math.MinInt8 {
2990 uv = uint64(byte(0x80))
2991 } else if subbed > math.MaxInt8 {
2992 uv = uint64(byte(0x7f))
2993 } else {
2994 uv = uint64(byte(int8(subbed)))
2995 }
2996 } else {
2997 if subbed := int64(v) - int64(w); subbed < 0 {
2998 uv = uint64(byte(0))
2999 } else if subbed > math.MaxUint8 {
3000 uv = uint64(byte(0xff))
3001 } else {
3002 uv = uint64(byte(subbed))
3003 }
3004 }
3005
3006 if i < 8 {
3007 retLo |= uv << (i * 8)
3008 } else {
3009 retHi |= uv << ((i - 8) * 8)
3010 }
3011 }
3012 case shapeI16x8:
3013 for i := 0; i < 8; i++ {
3014 var v, w uint16
3015 if i < 4 {
3016 v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
3017 } else {
3018 v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
3019 }
3020
3021 var uv uint64
3022 if op.B3 { // signed
3023 if subbed := int64(int16(v)) - int64(int16(w)); subbed < math.MinInt16 {
3024 uv = uint64(uint16(0x8000))
3025 } else if subbed > math.MaxInt16 {
3026 uv = uint64(uint16(0x7fff))
3027 } else {
3028 uv = uint64(uint16(int16(subbed)))
3029 }
3030 } else {
3031 if subbed := int64(v) - int64(w); subbed < 0 {
3032 uv = uint64(uint16(0))
3033 } else if subbed > math.MaxUint16 {
3034 uv = uint64(uint16(0xffff))
3035 } else {
3036 uv = uint64(uint16(subbed))
3037 }
3038 }
3039
3040 if i < 4 {
3041 retLo |= uv << (i * 16)
3042 } else {
3043 retHi |= uv << ((i - 4) * 16)
3044 }
3045 }
3046 }
3047
3048 ce.pushValue(retLo)
3049 ce.pushValue(retHi)
3050 frame.pc++
3051 case operationKindV128Mul:
3052 x2hi, x2lo := ce.popValue(), ce.popValue()
3053 x1hi, x1lo := ce.popValue(), ce.popValue()
3054 var retLo, retHi uint64
3055 switch op.B1 {
3056 case shapeI16x8:
3057 retHi = uint64(uint16(x1hi)*uint16(x2hi)) | (uint64(uint16(x1hi>>16)*uint16(x2hi>>16)) << 16) |
3058 (uint64(uint16(x1hi>>32)*uint16(x2hi>>32)) << 32) | (uint64(uint16(x1hi>>48)*uint16(x2hi>>48)) << 48)
3059 retLo = uint64(uint16(x1lo)*uint16(x2lo)) | (uint64(uint16(x1lo>>16)*uint16(x2lo>>16)) << 16) |
3060 (uint64(uint16(x1lo>>32)*uint16(x2lo>>32)) << 32) | (uint64(uint16(x1lo>>48)*uint16(x2lo>>48)) << 48)
3061 case shapeI32x4:
3062 retHi = uint64(uint32(x1hi)*uint32(x2hi)) | (uint64(uint32(x1hi>>32)*uint32(x2hi>>32)) << 32)
3063 retLo = uint64(uint32(x1lo)*uint32(x2lo)) | (uint64(uint32(x1lo>>32)*uint32(x2lo>>32)) << 32)
3064 case shapeI64x2:
3065 retHi = x1hi * x2hi
3066 retLo = x1lo * x2lo
3067 case shapeF32x4:
3068 retHi = mulFloat32bits(uint32(x1hi), uint32(x2hi)) | mulFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
3069 retLo = mulFloat32bits(uint32(x1lo), uint32(x2lo)) | mulFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
3070 case shapeF64x2:
3071 retHi = math.Float64bits(math.Float64frombits(x1hi) * math.Float64frombits(x2hi))
3072 retLo = math.Float64bits(math.Float64frombits(x1lo) * math.Float64frombits(x2lo))
3073 }
3074 ce.pushValue(retLo)
3075 ce.pushValue(retHi)
3076 frame.pc++
3077 case operationKindV128Div:
3078 x2hi, x2lo := ce.popValue(), ce.popValue()
3079 x1hi, x1lo := ce.popValue(), ce.popValue()
3080 var retLo, retHi uint64
3081 if op.B1 == shapeF64x2 {
3082 retHi = math.Float64bits(math.Float64frombits(x1hi) / math.Float64frombits(x2hi))
3083 retLo = math.Float64bits(math.Float64frombits(x1lo) / math.Float64frombits(x2lo))
3084 } else {
3085 retHi = divFloat32bits(uint32(x1hi), uint32(x2hi)) | divFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
3086 retLo = divFloat32bits(uint32(x1lo), uint32(x2lo)) | divFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
3087 }
3088 ce.pushValue(retLo)
3089 ce.pushValue(retHi)
3090 frame.pc++
3091 case operationKindV128Neg:
3092 hi, lo := ce.popValue(), ce.popValue()
3093 switch op.B1 {
3094 case shapeI8x16:
3095 lo = uint64(-byte(lo)) | (uint64(-byte(lo>>8)) << 8) |
3096 (uint64(-byte(lo>>16)) << 16) | (uint64(-byte(lo>>24)) << 24) |
3097 (uint64(-byte(lo>>32)) << 32) | (uint64(-byte(lo>>40)) << 40) |
3098 (uint64(-byte(lo>>48)) << 48) | (uint64(-byte(lo>>56)) << 56)
3099 hi = uint64(-byte(hi)) | (uint64(-byte(hi>>8)) << 8) |
3100 (uint64(-byte(hi>>16)) << 16) | (uint64(-byte(hi>>24)) << 24) |
3101 (uint64(-byte(hi>>32)) << 32) | (uint64(-byte(hi>>40)) << 40) |
3102 (uint64(-byte(hi>>48)) << 48) | (uint64(-byte(hi>>56)) << 56)
3103 case shapeI16x8:
3104 hi = uint64(-uint16(hi)) | (uint64(-uint16(hi>>16)) << 16) |
3105 (uint64(-uint16(hi>>32)) << 32) | (uint64(-uint16(hi>>48)) << 48)
3106 lo = uint64(-uint16(lo)) | (uint64(-uint16(lo>>16)) << 16) |
3107 (uint64(-uint16(lo>>32)) << 32) | (uint64(-uint16(lo>>48)) << 48)
3108 case shapeI32x4:
3109 hi = uint64(-uint32(hi)) | (uint64(-uint32(hi>>32)) << 32)
3110 lo = uint64(-uint32(lo)) | (uint64(-uint32(lo>>32)) << 32)
3111 case shapeI64x2:
3112 hi = -hi
3113 lo = -lo
3114 case shapeF32x4:
3115 hi = uint64(math.Float32bits(-math.Float32frombits(uint32(hi)))) |
3116 (uint64(math.Float32bits(-math.Float32frombits(uint32(hi>>32)))) << 32)
3117 lo = uint64(math.Float32bits(-math.Float32frombits(uint32(lo)))) |
3118 (uint64(math.Float32bits(-math.Float32frombits(uint32(lo>>32)))) << 32)
3119 case shapeF64x2:
3120 hi = math.Float64bits(-math.Float64frombits(hi))
3121 lo = math.Float64bits(-math.Float64frombits(lo))
3122 }
3123 ce.pushValue(lo)
3124 ce.pushValue(hi)
3125 frame.pc++
3126 case operationKindV128Sqrt:
3127 hi, lo := ce.popValue(), ce.popValue()
3128 if op.B1 == shapeF64x2 {
3129 hi = math.Float64bits(math.Sqrt(math.Float64frombits(hi)))
3130 lo = math.Float64bits(math.Sqrt(math.Float64frombits(lo)))
3131 } else {
3132 hi = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi))))))) |
3133 (uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi>>32))))))) << 32)
3134 lo = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo))))))) |
3135 (uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo>>32))))))) << 32)
3136 }
3137 ce.pushValue(lo)
3138 ce.pushValue(hi)
3139 frame.pc++
3140 case operationKindV128Abs:
3141 hi, lo := ce.popValue(), ce.popValue()
3142 switch op.B1 {
3143 case shapeI8x16:
3144 lo = uint64(i8Abs(byte(lo))) | (uint64(i8Abs(byte(lo>>8))) << 8) |
3145 (uint64(i8Abs(byte(lo>>16))) << 16) | (uint64(i8Abs(byte(lo>>24))) << 24) |
3146 (uint64(i8Abs(byte(lo>>32))) << 32) | (uint64(i8Abs(byte(lo>>40))) << 40) |
3147 (uint64(i8Abs(byte(lo>>48))) << 48) | (uint64(i8Abs(byte(lo>>56))) << 56)
3148 hi = uint64(i8Abs(byte(hi))) | (uint64(i8Abs(byte(hi>>8))) << 8) |
3149 (uint64(i8Abs(byte(hi>>16))) << 16) | (uint64(i8Abs(byte(hi>>24))) << 24) |
3150 (uint64(i8Abs(byte(hi>>32))) << 32) | (uint64(i8Abs(byte(hi>>40))) << 40) |
3151 (uint64(i8Abs(byte(hi>>48))) << 48) | (uint64(i8Abs(byte(hi>>56))) << 56)
3152 case shapeI16x8:
3153 hi = uint64(i16Abs(uint16(hi))) | (uint64(i16Abs(uint16(hi>>16))) << 16) |
3154 (uint64(i16Abs(uint16(hi>>32))) << 32) | (uint64(i16Abs(uint16(hi>>48))) << 48)
3155 lo = uint64(i16Abs(uint16(lo))) | (uint64(i16Abs(uint16(lo>>16))) << 16) |
3156 (uint64(i16Abs(uint16(lo>>32))) << 32) | (uint64(i16Abs(uint16(lo>>48))) << 48)
3157 case shapeI32x4:
3158 hi = uint64(i32Abs(uint32(hi))) | (uint64(i32Abs(uint32(hi>>32))) << 32)
3159 lo = uint64(i32Abs(uint32(lo))) | (uint64(i32Abs(uint32(lo>>32))) << 32)
3160 case shapeI64x2:
3161 if int64(hi) < 0 {
3162 hi = -hi
3163 }
3164 if int64(lo) < 0 {
3165 lo = -lo
3166 }
3167 case shapeF32x4:
3168 hi = hi &^ (1<<31 | 1<<63)
3169 lo = lo &^ (1<<31 | 1<<63)
3170 case shapeF64x2:
3171 hi = hi &^ (1 << 63)
3172 lo = lo &^ (1 << 63)
3173 }
3174 ce.pushValue(lo)
3175 ce.pushValue(hi)
3176 frame.pc++
3177 case operationKindV128Popcnt:
3178 hi, lo := ce.popValue(), ce.popValue()
3179 var retLo, retHi uint64
3180 for i := 0; i < 16; i++ {
3181 var v byte
3182 if i < 8 {
3183 v = byte(lo >> (i * 8))
3184 } else {
3185 v = byte(hi >> ((i - 8) * 8))
3186 }
3187
3188 var cnt uint64
3189 for i := 0; i < 8; i++ {
3190 if (v>>i)&0b1 != 0 {
3191 cnt++
3192 }
3193 }
3194
3195 if i < 8 {
3196 retLo |= cnt << (i * 8)
3197 } else {
3198 retHi |= cnt << ((i - 8) * 8)
3199 }
3200 }
3201 ce.pushValue(retLo)
3202 ce.pushValue(retHi)
3203 frame.pc++
3204 case operationKindV128Min:
3205 x2hi, x2lo := ce.popValue(), ce.popValue()
3206 x1hi, x1lo := ce.popValue(), ce.popValue()
3207 var retLo, retHi uint64
3208 switch op.B1 {
3209 case shapeI8x16:
3210 if op.B3 { // signed
3211 retLo = uint64(i8MinS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinS(uint8(x1lo), uint8(x2lo))) |
3212 uint64(i8MinS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
3213 uint64(i8MinS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
3214 uint64(i8MinS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
3215 retHi = uint64(i8MinS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinS(uint8(x1hi), uint8(x2hi))) |
3216 uint64(i8MinS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
3217 uint64(i8MinS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
3218 uint64(i8MinS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
3219 } else {
3220 retLo = uint64(i8MinU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinU(uint8(x1lo), uint8(x2lo))) |
3221 uint64(i8MinU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
3222 uint64(i8MinU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
3223 uint64(i8MinU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
3224 retHi = uint64(i8MinU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinU(uint8(x1hi), uint8(x2hi))) |
3225 uint64(i8MinU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
3226 uint64(i8MinU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
3227 uint64(i8MinU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
3228 }
3229 case shapeI16x8:
3230 if op.B3 { // signed
3231 retLo = uint64(i16MinS(uint16(x1lo), uint16(x2lo))) |
3232 uint64(i16MinS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
3233 uint64(i16MinS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
3234 uint64(i16MinS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
3235 retHi = uint64(i16MinS(uint16(x1hi), uint16(x2hi))) |
3236 uint64(i16MinS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
3237 uint64(i16MinS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
3238 uint64(i16MinS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
3239 } else {
3240 retLo = uint64(i16MinU(uint16(x1lo), uint16(x2lo))) |
3241 uint64(i16MinU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
3242 uint64(i16MinU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
3243 uint64(i16MinU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
3244 retHi = uint64(i16MinU(uint16(x1hi), uint16(x2hi))) |
3245 uint64(i16MinU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
3246 uint64(i16MinU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
3247 uint64(i16MinU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
3248 }
3249 case shapeI32x4:
3250 if op.B3 { // signed
3251 retLo = uint64(i32MinS(uint32(x1lo), uint32(x2lo))) |
3252 uint64(i32MinS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
3253 retHi = uint64(i32MinS(uint32(x1hi), uint32(x2hi))) |
3254 uint64(i32MinS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
3255 } else {
3256 retLo = uint64(i32MinU(uint32(x1lo), uint32(x2lo))) |
3257 uint64(i32MinU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
3258 retHi = uint64(i32MinU(uint32(x1hi), uint32(x2hi))) |
3259 uint64(i32MinU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
3260 }
3261 case shapeF32x4:
3262 retHi = wasmCompatMin32bits(uint32(x1hi), uint32(x2hi)) |
3263 wasmCompatMin32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
3264 retLo = wasmCompatMin32bits(uint32(x1lo), uint32(x2lo)) |
3265 wasmCompatMin32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
3266 case shapeF64x2:
3267 retHi = math.Float64bits(moremath.WasmCompatMin64(
3268 math.Float64frombits(x1hi),
3269 math.Float64frombits(x2hi),
3270 ))
3271 retLo = math.Float64bits(moremath.WasmCompatMin64(
3272 math.Float64frombits(x1lo),
3273 math.Float64frombits(x2lo),
3274 ))
3275 }
3276 ce.pushValue(retLo)
3277 ce.pushValue(retHi)
3278 frame.pc++
3279 case operationKindV128Max:
3280 x2hi, x2lo := ce.popValue(), ce.popValue()
3281 x1hi, x1lo := ce.popValue(), ce.popValue()
3282 var retLo, retHi uint64
3283 switch op.B1 {
3284 case shapeI8x16:
3285 if op.B3 { // signed
3286 retLo = uint64(i8MaxS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxS(uint8(x1lo), uint8(x2lo))) |
3287 uint64(i8MaxS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
3288 uint64(i8MaxS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
3289 uint64(i8MaxS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
3290 retHi = uint64(i8MaxS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxS(uint8(x1hi), uint8(x2hi))) |
3291 uint64(i8MaxS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
3292 uint64(i8MaxS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
3293 uint64(i8MaxS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
3294 } else {
3295 retLo = uint64(i8MaxU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxU(uint8(x1lo), uint8(x2lo))) |
3296 uint64(i8MaxU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
3297 uint64(i8MaxU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
3298 uint64(i8MaxU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
3299 retHi = uint64(i8MaxU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxU(uint8(x1hi), uint8(x2hi))) |
3300 uint64(i8MaxU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
3301 uint64(i8MaxU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
3302 uint64(i8MaxU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
3303 }
3304 case shapeI16x8:
3305 if op.B3 { // signed
3306 retLo = uint64(i16MaxS(uint16(x1lo), uint16(x2lo))) |
3307 uint64(i16MaxS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
3308 uint64(i16MaxS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
3309 uint64(i16MaxS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
3310 retHi = uint64(i16MaxS(uint16(x1hi), uint16(x2hi))) |
3311 uint64(i16MaxS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
3312 uint64(i16MaxS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
3313 uint64(i16MaxS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
3314 } else {
3315 retLo = uint64(i16MaxU(uint16(x1lo), uint16(x2lo))) |
3316 uint64(i16MaxU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
3317 uint64(i16MaxU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
3318 uint64(i16MaxU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
3319 retHi = uint64(i16MaxU(uint16(x1hi), uint16(x2hi))) |
3320 uint64(i16MaxU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
3321 uint64(i16MaxU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
3322 uint64(i16MaxU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
3323 }
3324 case shapeI32x4:
3325 if op.B3 { // signed
3326 retLo = uint64(i32MaxS(uint32(x1lo), uint32(x2lo))) |
3327 uint64(i32MaxS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
3328 retHi = uint64(i32MaxS(uint32(x1hi), uint32(x2hi))) |
3329 uint64(i32MaxS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
3330 } else {
3331 retLo = uint64(i32MaxU(uint32(x1lo), uint32(x2lo))) |
3332 uint64(i32MaxU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
3333 retHi = uint64(i32MaxU(uint32(x1hi), uint32(x2hi))) |
3334 uint64(i32MaxU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
3335 }
3336 case shapeF32x4:
3337 retHi = wasmCompatMax32bits(uint32(x1hi), uint32(x2hi)) |
3338 wasmCompatMax32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
3339 retLo = wasmCompatMax32bits(uint32(x1lo), uint32(x2lo)) |
3340 wasmCompatMax32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
3341 case shapeF64x2:
3342 retHi = math.Float64bits(moremath.WasmCompatMax64(
3343 math.Float64frombits(x1hi),
3344 math.Float64frombits(x2hi),
3345 ))
3346 retLo = math.Float64bits(moremath.WasmCompatMax64(
3347 math.Float64frombits(x1lo),
3348 math.Float64frombits(x2lo),
3349 ))
3350 }
3351 ce.pushValue(retLo)
3352 ce.pushValue(retHi)
3353 frame.pc++
3354 case operationKindV128AvgrU:
3355 x2hi, x2lo := ce.popValue(), ce.popValue()
3356 x1hi, x1lo := ce.popValue(), ce.popValue()
3357 var retLo, retHi uint64
3358 switch op.B1 {
3359 case shapeI8x16:
3360 retLo = uint64(i8RoundingAverage(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1lo), uint8(x2lo))) |
3361 uint64(i8RoundingAverage(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
3362 uint64(i8RoundingAverage(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
3363 uint64(i8RoundingAverage(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
3364 retHi = uint64(i8RoundingAverage(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1hi), uint8(x2hi))) |
3365 uint64(i8RoundingAverage(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
3366 uint64(i8RoundingAverage(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
3367 uint64(i8RoundingAverage(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
3368 case shapeI16x8:
3369 retLo = uint64(i16RoundingAverage(uint16(x1lo), uint16(x2lo))) |
3370 uint64(i16RoundingAverage(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
3371 uint64(i16RoundingAverage(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
3372 uint64(i16RoundingAverage(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
3373 retHi = uint64(i16RoundingAverage(uint16(x1hi), uint16(x2hi))) |
3374 uint64(i16RoundingAverage(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
3375 uint64(i16RoundingAverage(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
3376 uint64(i16RoundingAverage(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
3377 }
3378 ce.pushValue(retLo)
3379 ce.pushValue(retHi)
3380 frame.pc++
3381 case operationKindV128Pmin:
3382 x2hi, x2lo := ce.popValue(), ce.popValue()
3383 x1hi, x1lo := ce.popValue(), ce.popValue()
3384 var retLo, retHi uint64
3385 if op.B1 == shapeF32x4 {
3386 if flt32(math.Float32frombits(uint32(x2lo)), math.Float32frombits(uint32(x1lo))) {
3387 retLo = x2lo & 0x00000000_ffffffff
3388 } else {
3389 retLo = x1lo & 0x00000000_ffffffff
3390 }
3391 if flt32(math.Float32frombits(uint32(x2lo>>32)), math.Float32frombits(uint32(x1lo>>32))) {
3392 retLo |= x2lo & 0xffffffff_00000000
3393 } else {
3394 retLo |= x1lo & 0xffffffff_00000000
3395 }
3396 if flt32(math.Float32frombits(uint32(x2hi)), math.Float32frombits(uint32(x1hi))) {
3397 retHi = x2hi & 0x00000000_ffffffff
3398 } else {
3399 retHi = x1hi & 0x00000000_ffffffff
3400 }
3401 if flt32(math.Float32frombits(uint32(x2hi>>32)), math.Float32frombits(uint32(x1hi>>32))) {
3402 retHi |= x2hi & 0xffffffff_00000000
3403 } else {
3404 retHi |= x1hi & 0xffffffff_00000000
3405 }
3406 } else {
3407 if flt64(math.Float64frombits(x2lo), math.Float64frombits(x1lo)) {
3408 retLo = x2lo
3409 } else {
3410 retLo = x1lo
3411 }
3412 if flt64(math.Float64frombits(x2hi), math.Float64frombits(x1hi)) {
3413 retHi = x2hi
3414 } else {
3415 retHi = x1hi
3416 }
3417 }
3418 ce.pushValue(retLo)
3419 ce.pushValue(retHi)
3420 frame.pc++
3421 case operationKindV128Pmax:
3422 x2hi, x2lo := ce.popValue(), ce.popValue()
3423 x1hi, x1lo := ce.popValue(), ce.popValue()
3424 var retLo, retHi uint64
3425 if op.B1 == shapeF32x4 {
3426 if flt32(math.Float32frombits(uint32(x1lo)), math.Float32frombits(uint32(x2lo))) {
3427 retLo = x2lo & 0x00000000_ffffffff
3428 } else {
3429 retLo = x1lo & 0x00000000_ffffffff
3430 }
3431 if flt32(math.Float32frombits(uint32(x1lo>>32)), math.Float32frombits(uint32(x2lo>>32))) {
3432 retLo |= x2lo & 0xffffffff_00000000
3433 } else {
3434 retLo |= x1lo & 0xffffffff_00000000
3435 }
3436 if flt32(math.Float32frombits(uint32(x1hi)), math.Float32frombits(uint32(x2hi))) {
3437 retHi = x2hi & 0x00000000_ffffffff
3438 } else {
3439 retHi = x1hi & 0x00000000_ffffffff
3440 }
3441 if flt32(math.Float32frombits(uint32(x1hi>>32)), math.Float32frombits(uint32(x2hi>>32))) {
3442 retHi |= x2hi & 0xffffffff_00000000
3443 } else {
3444 retHi |= x1hi & 0xffffffff_00000000
3445 }
3446 } else {
3447 if flt64(math.Float64frombits(x1lo), math.Float64frombits(x2lo)) {
3448 retLo = x2lo
3449 } else {
3450 retLo = x1lo
3451 }
3452 if flt64(math.Float64frombits(x1hi), math.Float64frombits(x2hi)) {
3453 retHi = x2hi
3454 } else {
3455 retHi = x1hi
3456 }
3457 }
3458 ce.pushValue(retLo)
3459 ce.pushValue(retHi)
3460 frame.pc++
3461 case operationKindV128Ceil:
3462 hi, lo := ce.popValue(), ce.popValue()
3463 if op.B1 == shapeF32x4 {
3464 lo = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo))))) |
3465 (uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo>>32))))) << 32)
3466 hi = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi))))) |
3467 (uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi>>32))))) << 32)
3468 } else {
3469 lo = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(lo)))
3470 hi = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(hi)))
3471 }
3472 ce.pushValue(lo)
3473 ce.pushValue(hi)
3474 frame.pc++
3475 case operationKindV128Floor:
3476 hi, lo := ce.popValue(), ce.popValue()
3477 if op.B1 == shapeF32x4 {
3478 lo = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo))))) |
3479 (uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo>>32))))) << 32)
3480 hi = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi))))) |
3481 (uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi>>32))))) << 32)
3482 } else {
3483 lo = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(lo)))
3484 hi = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(hi)))
3485 }
3486 ce.pushValue(lo)
3487 ce.pushValue(hi)
3488 frame.pc++
3489 case operationKindV128Trunc:
3490 hi, lo := ce.popValue(), ce.popValue()
3491 if op.B1 == shapeF32x4 {
3492 lo = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo))))) |
3493 (uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo>>32))))) << 32)
3494 hi = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi))))) |
3495 (uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi>>32))))) << 32)
3496 } else {
3497 lo = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(lo)))
3498 hi = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(hi)))
3499 }
3500 ce.pushValue(lo)
3501 ce.pushValue(hi)
3502 frame.pc++
3503 case operationKindV128Nearest:
3504 hi, lo := ce.popValue(), ce.popValue()
3505 if op.B1 == shapeF32x4 {
3506 lo = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo))))) |
3507 (uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo>>32))))) << 32)
3508 hi = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi))))) |
3509 (uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi>>32))))) << 32)
3510 } else {
3511 lo = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(lo)))
3512 hi = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(hi)))
3513 }
3514 ce.pushValue(lo)
3515 ce.pushValue(hi)
3516 frame.pc++
3517 case operationKindV128Extend:
3518 hi, lo := ce.popValue(), ce.popValue()
3519 var origin uint64
3520 if op.B3 { // use lower 64 bits
3521 origin = lo
3522 } else {
3523 origin = hi
3524 }
3525
3526 signed := op.B2 == 1
3527
3528 var retHi, retLo uint64
3529 switch op.B1 {
3530 case shapeI8x16:
3531 for i := 0; i < 8; i++ {
3532 v8 := byte(origin >> (i * 8))
3533
3534 var v16 uint16
3535 if signed {
3536 v16 = uint16(int8(v8))
3537 } else {
3538 v16 = uint16(v8)
3539 }
3540
3541 if i < 4 {
3542 retLo |= uint64(v16) << (i * 16)
3543 } else {
3544 retHi |= uint64(v16) << ((i - 4) * 16)
3545 }
3546 }
3547 case shapeI16x8:
3548 for i := 0; i < 4; i++ {
3549 v16 := uint16(origin >> (i * 16))
3550
3551 var v32 uint32
3552 if signed {
3553 v32 = uint32(int16(v16))
3554 } else {
3555 v32 = uint32(v16)
3556 }
3557
3558 if i < 2 {
3559 retLo |= uint64(v32) << (i * 32)
3560 } else {
3561 retHi |= uint64(v32) << ((i - 2) * 32)
3562 }
3563 }
3564 case shapeI32x4:
3565 v32Lo := uint32(origin)
3566 v32Hi := uint32(origin >> 32)
3567 if signed {
3568 retLo = uint64(int32(v32Lo))
3569 retHi = uint64(int32(v32Hi))
3570 } else {
3571 retLo = uint64(v32Lo)
3572 retHi = uint64(v32Hi)
3573 }
3574 }
3575 ce.pushValue(retLo)
3576 ce.pushValue(retHi)
3577 frame.pc++
3578 case operationKindV128ExtMul:
3579 x2Hi, x2Lo := ce.popValue(), ce.popValue()
3580 x1Hi, x1Lo := ce.popValue(), ce.popValue()
3581 var x1, x2 uint64
3582 if op.B3 { // use lower 64 bits
3583 x1, x2 = x1Lo, x2Lo
3584 } else {
3585 x1, x2 = x1Hi, x2Hi
3586 }
3587
3588 signed := op.B2 == 1
3589
3590 var retLo, retHi uint64
3591 switch op.B1 {
3592 case shapeI8x16:
3593 for i := 0; i < 8; i++ {
3594 v1, v2 := byte(x1>>(i*8)), byte(x2>>(i*8))
3595
3596 var v16 uint16
3597 if signed {
3598 v16 = uint16(int16(int8(v1)) * int16(int8(v2)))
3599 } else {
3600 v16 = uint16(v1) * uint16(v2)
3601 }
3602
3603 if i < 4 {
3604 retLo |= uint64(v16) << (i * 16)
3605 } else {
3606 retHi |= uint64(v16) << ((i - 4) * 16)
3607 }
3608 }
3609 case shapeI16x8:
3610 for i := 0; i < 4; i++ {
3611 v1, v2 := uint16(x1>>(i*16)), uint16(x2>>(i*16))
3612
3613 var v32 uint32
3614 if signed {
3615 v32 = uint32(int32(int16(v1)) * int32(int16(v2)))
3616 } else {
3617 v32 = uint32(v1) * uint32(v2)
3618 }
3619
3620 if i < 2 {
3621 retLo |= uint64(v32) << (i * 32)
3622 } else {
3623 retHi |= uint64(v32) << ((i - 2) * 32)
3624 }
3625 }
3626 case shapeI32x4:
3627 v1Lo, v2Lo := uint32(x1), uint32(x2)
3628 v1Hi, v2Hi := uint32(x1>>32), uint32(x2>>32)
3629 if signed {
3630 retLo = uint64(int64(int32(v1Lo)) * int64(int32(v2Lo)))
3631 retHi = uint64(int64(int32(v1Hi)) * int64(int32(v2Hi)))
3632 } else {
3633 retLo = uint64(v1Lo) * uint64(v2Lo)
3634 retHi = uint64(v1Hi) * uint64(v2Hi)
3635 }
3636 }
3637
3638 ce.pushValue(retLo)
3639 ce.pushValue(retHi)
3640 frame.pc++
3641 case operationKindV128Q15mulrSatS:
3642 x2hi, x2Lo := ce.popValue(), ce.popValue()
3643 x1hi, x1Lo := ce.popValue(), ce.popValue()
3644 var retLo, retHi uint64
3645 for i := 0; i < 8; i++ {
3646 var v, w int16
3647 if i < 4 {
3648 v, w = int16(uint16(x1Lo>>(i*16))), int16(uint16(x2Lo>>(i*16)))
3649 } else {
3650 v, w = int16(uint16(x1hi>>((i-4)*16))), int16(uint16(x2hi>>((i-4)*16)))
3651 }
3652
3653 var uv uint64
3654 // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-q-format-rounding-multiplication
3655 if calc := ((int32(v) * int32(w)) + 0x4000) >> 15; calc < math.MinInt16 {
3656 uv = uint64(uint16(0x8000))
3657 } else if calc > math.MaxInt16 {
3658 uv = uint64(uint16(0x7fff))
3659 } else {
3660 uv = uint64(uint16(int16(calc)))
3661 }
3662
3663 if i < 4 {
3664 retLo |= uv << (i * 16)
3665 } else {
3666 retHi |= uv << ((i - 4) * 16)
3667 }
3668 }
3669
3670 ce.pushValue(retLo)
3671 ce.pushValue(retHi)
3672 frame.pc++
3673 case operationKindV128ExtAddPairwise:
3674 hi, lo := ce.popValue(), ce.popValue()
3675
3676 signed := op.B3
3677
3678 var retLo, retHi uint64
3679 switch op.B1 {
3680 case shapeI8x16:
3681 for i := 0; i < 8; i++ {
3682 var v1, v2 byte
3683 if i < 4 {
3684 v1, v2 = byte(lo>>((i*2)*8)), byte(lo>>((i*2+1)*8))
3685 } else {
3686 v1, v2 = byte(hi>>(((i-4)*2)*8)), byte(hi>>(((i-4)*2+1)*8))
3687 }
3688
3689 var v16 uint16
3690 if signed {
3691 v16 = uint16(int16(int8(v1)) + int16(int8(v2)))
3692 } else {
3693 v16 = uint16(v1) + uint16(v2)
3694 }
3695
3696 if i < 4 {
3697 retLo |= uint64(v16) << (i * 16)
3698 } else {
3699 retHi |= uint64(v16) << ((i - 4) * 16)
3700 }
3701 }
3702 case shapeI16x8:
3703 for i := 0; i < 4; i++ {
3704 var v1, v2 uint16
3705 if i < 2 {
3706 v1, v2 = uint16(lo>>((i*2)*16)), uint16(lo>>((i*2+1)*16))
3707 } else {
3708 v1, v2 = uint16(hi>>(((i-2)*2)*16)), uint16(hi>>(((i-2)*2+1)*16))
3709 }
3710
3711 var v32 uint32
3712 if signed {
3713 v32 = uint32(int32(int16(v1)) + int32(int16(v2)))
3714 } else {
3715 v32 = uint32(v1) + uint32(v2)
3716 }
3717
3718 if i < 2 {
3719 retLo |= uint64(v32) << (i * 32)
3720 } else {
3721 retHi |= uint64(v32) << ((i - 2) * 32)
3722 }
3723 }
3724 }
3725 ce.pushValue(retLo)
3726 ce.pushValue(retHi)
3727 frame.pc++
3728 case operationKindV128FloatPromote:
3729 _, toPromote := ce.popValue(), ce.popValue()
3730 ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote)))))
3731 ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote >> 32)))))
3732 frame.pc++
3733 case operationKindV128FloatDemote:
3734 hi, lo := ce.popValue(), ce.popValue()
3735 ce.pushValue(
3736 uint64(math.Float32bits(float32(math.Float64frombits(lo)))) |
3737 (uint64(math.Float32bits(float32(math.Float64frombits(hi)))) << 32),
3738 )
3739 ce.pushValue(0)
3740 frame.pc++
3741 case operationKindV128FConvertFromI:
3742 hi, lo := ce.popValue(), ce.popValue()
3743 v1, v2, v3, v4 := uint32(lo), uint32(lo>>32), uint32(hi), uint32(hi>>32)
3744 signed := op.B3
3745
3746 var retLo, retHi uint64
3747 switch op.B1 { // Destination shape.
3748 case shapeF32x4: // f32x4 from signed/unsigned i32x4
3749 if signed {
3750 retLo = uint64(math.Float32bits(float32(int32(v1)))) |
3751 (uint64(math.Float32bits(float32(int32(v2)))) << 32)
3752 retHi = uint64(math.Float32bits(float32(int32(v3)))) |
3753 (uint64(math.Float32bits(float32(int32(v4)))) << 32)
3754 } else {
3755 retLo = uint64(math.Float32bits(float32(v1))) |
3756 (uint64(math.Float32bits(float32(v2))) << 32)
3757 retHi = uint64(math.Float32bits(float32(v3))) |
3758 (uint64(math.Float32bits(float32(v4))) << 32)
3759 }
3760 case shapeF64x2: // f64x2 from signed/unsigned i32x4
3761 if signed {
3762 retLo, retHi = math.Float64bits(float64(int32(v1))), math.Float64bits(float64(int32(v2)))
3763 } else {
3764 retLo, retHi = math.Float64bits(float64(v1)), math.Float64bits(float64(v2))
3765 }
3766 }
3767
3768 ce.pushValue(retLo)
3769 ce.pushValue(retHi)
3770 frame.pc++
3771 case operationKindV128Narrow:
3772 x2Hi, x2Lo := ce.popValue(), ce.popValue()
3773 x1Hi, x1Lo := ce.popValue(), ce.popValue()
3774 signed := op.B3
3775
3776 var retLo, retHi uint64
3777 switch op.B1 {
3778 case shapeI16x8: // signed/unsigned i16x8 to i8x16
3779 for i := 0; i < 8; i++ {
3780 var v16 uint16
3781 if i < 4 {
3782 v16 = uint16(x1Lo >> (i * 16))
3783 } else {
3784 v16 = uint16(x1Hi >> ((i - 4) * 16))
3785 }
3786
3787 var v byte
3788 if signed {
3789 if s := int16(v16); s > math.MaxInt8 {
3790 v = math.MaxInt8
3791 } else if s < math.MinInt8 {
3792 s = math.MinInt8
3793 v = byte(s)
3794 } else {
3795 v = byte(v16)
3796 }
3797 } else {
3798 if s := int16(v16); s > math.MaxUint8 {
3799 v = math.MaxUint8
3800 } else if s < 0 {
3801 v = 0
3802 } else {
3803 v = byte(v16)
3804 }
3805 }
3806 retLo |= uint64(v) << (i * 8)
3807 }
3808 for i := 0; i < 8; i++ {
3809 var v16 uint16
3810 if i < 4 {
3811 v16 = uint16(x2Lo >> (i * 16))
3812 } else {
3813 v16 = uint16(x2Hi >> ((i - 4) * 16))
3814 }
3815
3816 var v byte
3817 if signed {
3818 if s := int16(v16); s > math.MaxInt8 {
3819 v = math.MaxInt8
3820 } else if s < math.MinInt8 {
3821 s = math.MinInt8
3822 v = byte(s)
3823 } else {
3824 v = byte(v16)
3825 }
3826 } else {
3827 if s := int16(v16); s > math.MaxUint8 {
3828 v = math.MaxUint8
3829 } else if s < 0 {
3830 v = 0
3831 } else {
3832 v = byte(v16)
3833 }
3834 }
3835 retHi |= uint64(v) << (i * 8)
3836 }
3837 case shapeI32x4: // signed/unsigned i32x4 to i16x8
3838 for i := 0; i < 4; i++ {
3839 var v32 uint32
3840 if i < 2 {
3841 v32 = uint32(x1Lo >> (i * 32))
3842 } else {
3843 v32 = uint32(x1Hi >> ((i - 2) * 32))
3844 }
3845
3846 var v uint16
3847 if signed {
3848 if s := int32(v32); s > math.MaxInt16 {
3849 v = math.MaxInt16
3850 } else if s < math.MinInt16 {
3851 s = math.MinInt16
3852 v = uint16(s)
3853 } else {
3854 v = uint16(v32)
3855 }
3856 } else {
3857 if s := int32(v32); s > math.MaxUint16 {
3858 v = math.MaxUint16
3859 } else if s < 0 {
3860 v = 0
3861 } else {
3862 v = uint16(v32)
3863 }
3864 }
3865 retLo |= uint64(v) << (i * 16)
3866 }
3867
3868 for i := 0; i < 4; i++ {
3869 var v32 uint32
3870 if i < 2 {
3871 v32 = uint32(x2Lo >> (i * 32))
3872 } else {
3873 v32 = uint32(x2Hi >> ((i - 2) * 32))
3874 }
3875
3876 var v uint16
3877 if signed {
3878 if s := int32(v32); s > math.MaxInt16 {
3879 v = math.MaxInt16
3880 } else if s < math.MinInt16 {
3881 s = math.MinInt16
3882 v = uint16(s)
3883 } else {
3884 v = uint16(v32)
3885 }
3886 } else {
3887 if s := int32(v32); s > math.MaxUint16 {
3888 v = math.MaxUint16
3889 } else if s < 0 {
3890 v = 0
3891 } else {
3892 v = uint16(v32)
3893 }
3894 }
3895 retHi |= uint64(v) << (i * 16)
3896 }
3897 }
3898 ce.pushValue(retLo)
3899 ce.pushValue(retHi)
3900 frame.pc++
3901 case operationKindV128Dot:
3902 x2Hi, x2Lo := ce.popValue(), ce.popValue()
3903 x1Hi, x1Lo := ce.popValue(), ce.popValue()
3904 lo, hi := v128Dot(x1Hi, x1Lo, x2Hi, x2Lo)
3905 ce.pushValue(lo)
3906 ce.pushValue(hi)
3907 frame.pc++
3908 case operationKindV128ITruncSatFromF:
3909 hi, lo := ce.popValue(), ce.popValue()
3910 signed := op.B3
3911 var retLo, retHi uint64
3912
3913 switch op.B1 {
3914 case shapeF32x4: // f32x4 to i32x4
3915 for i, f64 := range [4]float64{
3916 math.Trunc(float64(math.Float32frombits(uint32(lo)))),
3917 math.Trunc(float64(math.Float32frombits(uint32(lo >> 32)))),
3918 math.Trunc(float64(math.Float32frombits(uint32(hi)))),
3919 math.Trunc(float64(math.Float32frombits(uint32(hi >> 32)))),
3920 } {
3921
3922 var v uint32
3923 if math.IsNaN(f64) {
3924 v = 0
3925 } else if signed {
3926 if f64 < math.MinInt32 {
3927 f64 = math.MinInt32
3928 } else if f64 > math.MaxInt32 {
3929 f64 = math.MaxInt32
3930 }
3931 v = uint32(int32(f64))
3932 } else {
3933 if f64 < 0 {
3934 f64 = 0
3935 } else if f64 > math.MaxUint32 {
3936 f64 = math.MaxUint32
3937 }
3938 v = uint32(f64)
3939 }
3940
3941 if i < 2 {
3942 retLo |= uint64(v) << (i * 32)
3943 } else {
3944 retHi |= uint64(v) << ((i - 2) * 32)
3945 }
3946 }
3947
3948 case shapeF64x2: // f64x2 to i32x4
3949 for i, f := range [2]float64{
3950 math.Trunc(math.Float64frombits(lo)),
3951 math.Trunc(math.Float64frombits(hi)),
3952 } {
3953 var v uint32
3954 if math.IsNaN(f) {
3955 v = 0
3956 } else if signed {
3957 if f < math.MinInt32 {
3958 f = math.MinInt32
3959 } else if f > math.MaxInt32 {
3960 f = math.MaxInt32
3961 }
3962 v = uint32(int32(f))
3963 } else {
3964 if f < 0 {
3965 f = 0
3966 } else if f > math.MaxUint32 {
3967 f = math.MaxUint32
3968 }
3969 v = uint32(f)
3970 }
3971
3972 retLo |= uint64(v) << (i * 32)
3973 }
3974 }
3975
3976 ce.pushValue(retLo)
3977 ce.pushValue(retHi)
3978 frame.pc++
3979 case operationKindAtomicMemoryWait:
3980 timeout := int64(ce.popValue())
3981 exp := ce.popValue()
3982 offset := ce.popMemoryOffset(op)
3983 // Runtime instead of validation error because the spec intends to allow binaries to include
3984 // such instructions as long as they are not executed.
3985 if !memoryInst.Shared {
3986 panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
3987 }
3988
3989 switch unsignedType(op.B1) {
3990 case unsignedTypeI32:
3991 if offset%4 != 0 {
3992 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
3993 }
3994 if int(offset) > len(memoryInst.Buffer)-4 {
3995 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
3996 }
3997 ce.pushValue(memoryInst.Wait32(offset, uint32(exp), timeout, func(mem *wasm.MemoryInstance, offset uint32) uint32 {
3998 mem.Mux.Lock()
3999 defer mem.Mux.Unlock()
4000 value, _ := mem.ReadUint32Le(offset)
4001 return value
4002 }))
4003 case unsignedTypeI64:
4004 if offset%8 != 0 {
4005 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4006 }
4007 if int(offset) > len(memoryInst.Buffer)-8 {
4008 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4009 }
4010 ce.pushValue(memoryInst.Wait64(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint64 {
4011 mem.Mux.Lock()
4012 defer mem.Mux.Unlock()
4013 value, _ := mem.ReadUint64Le(offset)
4014 return value
4015 }))
4016 }
4017 frame.pc++
4018 case operationKindAtomicMemoryNotify:
4019 count := ce.popValue()
4020 offset := ce.popMemoryOffset(op)
4021 if offset%4 != 0 {
4022 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4023 }
4024 // Just a bounds check
4025 if offset >= memoryInst.Size() {
4026 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4027 }
4028 res := memoryInst.Notify(offset, uint32(count))
4029 ce.pushValue(uint64(res))
4030 frame.pc++
4031 case operationKindAtomicFence:
4032 // Memory not required for fence only
4033 if memoryInst != nil {
4034 // An empty critical section can be used as a synchronization primitive, which is what
4035 // fence is. Probably, there are no spectests or defined behavior to confirm this yet.
4036 memoryInst.Mux.Lock()
4037 memoryInst.Mux.Unlock() //nolint:staticcheck
4038 }
4039 frame.pc++
4040 case operationKindAtomicLoad:
4041 offset := ce.popMemoryOffset(op)
4042 switch unsignedType(op.B1) {
4043 case unsignedTypeI32:
4044 if offset%4 != 0 {
4045 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4046 }
4047 memoryInst.Mux.Lock()
4048 val, ok := memoryInst.ReadUint32Le(offset)
4049 memoryInst.Mux.Unlock()
4050 if !ok {
4051 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4052 }
4053 ce.pushValue(uint64(val))
4054 case unsignedTypeI64:
4055 if offset%8 != 0 {
4056 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4057 }
4058 memoryInst.Mux.Lock()
4059 val, ok := memoryInst.ReadUint64Le(offset)
4060 memoryInst.Mux.Unlock()
4061 if !ok {
4062 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4063 }
4064 ce.pushValue(val)
4065 }
4066 frame.pc++
4067 case operationKindAtomicLoad8:
4068 offset := ce.popMemoryOffset(op)
4069 memoryInst.Mux.Lock()
4070 val, ok := memoryInst.ReadByte(offset)
4071 memoryInst.Mux.Unlock()
4072 if !ok {
4073 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4074 }
4075 ce.pushValue(uint64(val))
4076 frame.pc++
4077 case operationKindAtomicLoad16:
4078 offset := ce.popMemoryOffset(op)
4079 if offset%2 != 0 {
4080 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4081 }
4082 memoryInst.Mux.Lock()
4083 val, ok := memoryInst.ReadUint16Le(offset)
4084 memoryInst.Mux.Unlock()
4085 if !ok {
4086 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4087 }
4088 ce.pushValue(uint64(val))
4089 frame.pc++
4090 case operationKindAtomicStore:
4091 val := ce.popValue()
4092 offset := ce.popMemoryOffset(op)
4093 switch unsignedType(op.B1) {
4094 case unsignedTypeI32:
4095 if offset%4 != 0 {
4096 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4097 }
4098 memoryInst.Mux.Lock()
4099 ok := memoryInst.WriteUint32Le(offset, uint32(val))
4100 memoryInst.Mux.Unlock()
4101 if !ok {
4102 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4103 }
4104 case unsignedTypeI64:
4105 if offset%8 != 0 {
4106 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4107 }
4108 memoryInst.Mux.Lock()
4109 ok := memoryInst.WriteUint64Le(offset, val)
4110 memoryInst.Mux.Unlock()
4111 if !ok {
4112 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4113 }
4114 }
4115 frame.pc++
4116 case operationKindAtomicStore8:
4117 val := byte(ce.popValue())
4118 offset := ce.popMemoryOffset(op)
4119 memoryInst.Mux.Lock()
4120 ok := memoryInst.WriteByte(offset, val)
4121 memoryInst.Mux.Unlock()
4122 if !ok {
4123 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4124 }
4125 frame.pc++
4126 case operationKindAtomicStore16:
4127 val := uint16(ce.popValue())
4128 offset := ce.popMemoryOffset(op)
4129 if offset%2 != 0 {
4130 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4131 }
4132 memoryInst.Mux.Lock()
4133 ok := memoryInst.WriteUint16Le(offset, val)
4134 memoryInst.Mux.Unlock()
4135 if !ok {
4136 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4137 }
4138 frame.pc++
4139 case operationKindAtomicRMW:
4140 val := ce.popValue()
4141 offset := ce.popMemoryOffset(op)
4142 switch unsignedType(op.B1) {
4143 case unsignedTypeI32:
4144 if offset%4 != 0 {
4145 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4146 }
4147 memoryInst.Mux.Lock()
4148 old, ok := memoryInst.ReadUint32Le(offset)
4149 if !ok {
4150 memoryInst.Mux.Unlock()
4151 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4152 }
4153 var newVal uint32
4154 switch atomicArithmeticOp(op.B2) {
4155 case atomicArithmeticOpAdd:
4156 newVal = old + uint32(val)
4157 case atomicArithmeticOpSub:
4158 newVal = old - uint32(val)
4159 case atomicArithmeticOpAnd:
4160 newVal = old & uint32(val)
4161 case atomicArithmeticOpOr:
4162 newVal = old | uint32(val)
4163 case atomicArithmeticOpXor:
4164 newVal = old ^ uint32(val)
4165 case atomicArithmeticOpNop:
4166 newVal = uint32(val)
4167 }
4168 memoryInst.WriteUint32Le(offset, newVal)
4169 memoryInst.Mux.Unlock()
4170 ce.pushValue(uint64(old))
4171 case unsignedTypeI64:
4172 if offset%8 != 0 {
4173 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4174 }
4175 memoryInst.Mux.Lock()
4176 old, ok := memoryInst.ReadUint64Le(offset)
4177 if !ok {
4178 memoryInst.Mux.Unlock()
4179 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4180 }
4181 var newVal uint64
4182 switch atomicArithmeticOp(op.B2) {
4183 case atomicArithmeticOpAdd:
4184 newVal = old + val
4185 case atomicArithmeticOpSub:
4186 newVal = old - val
4187 case atomicArithmeticOpAnd:
4188 newVal = old & val
4189 case atomicArithmeticOpOr:
4190 newVal = old | val
4191 case atomicArithmeticOpXor:
4192 newVal = old ^ val
4193 case atomicArithmeticOpNop:
4194 newVal = val
4195 }
4196 memoryInst.WriteUint64Le(offset, newVal)
4197 memoryInst.Mux.Unlock()
4198 ce.pushValue(old)
4199 }
4200 frame.pc++
4201 case operationKindAtomicRMW8:
4202 val := ce.popValue()
4203 offset := ce.popMemoryOffset(op)
4204 memoryInst.Mux.Lock()
4205 old, ok := memoryInst.ReadByte(offset)
4206 if !ok {
4207 memoryInst.Mux.Unlock()
4208 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4209 }
4210 arg := byte(val)
4211 var newVal byte
4212 switch atomicArithmeticOp(op.B2) {
4213 case atomicArithmeticOpAdd:
4214 newVal = old + arg
4215 case atomicArithmeticOpSub:
4216 newVal = old - arg
4217 case atomicArithmeticOpAnd:
4218 newVal = old & arg
4219 case atomicArithmeticOpOr:
4220 newVal = old | arg
4221 case atomicArithmeticOpXor:
4222 newVal = old ^ arg
4223 case atomicArithmeticOpNop:
4224 newVal = arg
4225 }
4226 memoryInst.WriteByte(offset, newVal)
4227 memoryInst.Mux.Unlock()
4228 ce.pushValue(uint64(old))
4229 frame.pc++
4230 case operationKindAtomicRMW16:
4231 val := ce.popValue()
4232 offset := ce.popMemoryOffset(op)
4233 if offset%2 != 0 {
4234 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4235 }
4236 memoryInst.Mux.Lock()
4237 old, ok := memoryInst.ReadUint16Le(offset)
4238 if !ok {
4239 memoryInst.Mux.Unlock()
4240 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4241 }
4242 arg := uint16(val)
4243 var newVal uint16
4244 switch atomicArithmeticOp(op.B2) {
4245 case atomicArithmeticOpAdd:
4246 newVal = old + arg
4247 case atomicArithmeticOpSub:
4248 newVal = old - arg
4249 case atomicArithmeticOpAnd:
4250 newVal = old & arg
4251 case atomicArithmeticOpOr:
4252 newVal = old | arg
4253 case atomicArithmeticOpXor:
4254 newVal = old ^ arg
4255 case atomicArithmeticOpNop:
4256 newVal = arg
4257 }
4258 memoryInst.WriteUint16Le(offset, newVal)
4259 memoryInst.Mux.Unlock()
4260 ce.pushValue(uint64(old))
4261 frame.pc++
4262 case operationKindAtomicRMWCmpxchg:
4263 rep := ce.popValue()
4264 exp := ce.popValue()
4265 offset := ce.popMemoryOffset(op)
4266 switch unsignedType(op.B1) {
4267 case unsignedTypeI32:
4268 if offset%4 != 0 {
4269 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4270 }
4271 memoryInst.Mux.Lock()
4272 old, ok := memoryInst.ReadUint32Le(offset)
4273 if !ok {
4274 memoryInst.Mux.Unlock()
4275 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4276 }
4277 if old == uint32(exp) {
4278 memoryInst.WriteUint32Le(offset, uint32(rep))
4279 }
4280 memoryInst.Mux.Unlock()
4281 ce.pushValue(uint64(old))
4282 case unsignedTypeI64:
4283 if offset%8 != 0 {
4284 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4285 }
4286 memoryInst.Mux.Lock()
4287 old, ok := memoryInst.ReadUint64Le(offset)
4288 if !ok {
4289 memoryInst.Mux.Unlock()
4290 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4291 }
4292 if old == exp {
4293 memoryInst.WriteUint64Le(offset, rep)
4294 }
4295 memoryInst.Mux.Unlock()
4296 ce.pushValue(old)
4297 }
4298 frame.pc++
4299 case operationKindAtomicRMW8Cmpxchg:
4300 rep := byte(ce.popValue())
4301 exp := byte(ce.popValue())
4302 offset := ce.popMemoryOffset(op)
4303 memoryInst.Mux.Lock()
4304 old, ok := memoryInst.ReadByte(offset)
4305 if !ok {
4306 memoryInst.Mux.Unlock()
4307 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4308 }
4309 if old == exp {
4310 memoryInst.WriteByte(offset, rep)
4311 }
4312 memoryInst.Mux.Unlock()
4313 ce.pushValue(uint64(old))
4314 frame.pc++
4315 case operationKindAtomicRMW16Cmpxchg:
4316 rep := uint16(ce.popValue())
4317 exp := uint16(ce.popValue())
4318 offset := ce.popMemoryOffset(op)
4319 if offset%2 != 0 {
4320 panic(wasmruntime.ErrRuntimeUnalignedAtomic)
4321 }
4322 memoryInst.Mux.Lock()
4323 old, ok := memoryInst.ReadUint16Le(offset)
4324 if !ok {
4325 memoryInst.Mux.Unlock()
4326 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4327 }
4328 if old == exp {
4329 memoryInst.WriteUint16Le(offset, rep)
4330 }
4331 memoryInst.Mux.Unlock()
4332 ce.pushValue(uint64(old))
4333 frame.pc++
4334 default:
4335 frame.pc++
4336 }
4337 }
4338 ce.popFrame()
4339}
4340
4341func wasmCompatMax32bits(v1, v2 uint32) uint64 {
4342 return uint64(math.Float32bits(moremath.WasmCompatMax32(
4343 math.Float32frombits(v1),
4344 math.Float32frombits(v2),
4345 )))
4346}
4347
4348func wasmCompatMin32bits(v1, v2 uint32) uint64 {
4349 return uint64(math.Float32bits(moremath.WasmCompatMin32(
4350 math.Float32frombits(v1),
4351 math.Float32frombits(v2),
4352 )))
4353}
4354
4355func addFloat32bits(v1, v2 uint32) uint64 {
4356 return uint64(math.Float32bits(math.Float32frombits(v1) + math.Float32frombits(v2)))
4357}
4358
4359func subFloat32bits(v1, v2 uint32) uint64 {
4360 return uint64(math.Float32bits(math.Float32frombits(v1) - math.Float32frombits(v2)))
4361}
4362
4363func mulFloat32bits(v1, v2 uint32) uint64 {
4364 return uint64(math.Float32bits(math.Float32frombits(v1) * math.Float32frombits(v2)))
4365}
4366
4367func divFloat32bits(v1, v2 uint32) uint64 {
4368 return uint64(math.Float32bits(math.Float32frombits(v1) / math.Float32frombits(v2)))
4369}
4370
4371// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
4372func flt32(z1, z2 float32) bool {
4373 if z1 != z1 || z2 != z2 {
4374 return false
4375 } else if z1 == z2 {
4376 return false
4377 } else if math.IsInf(float64(z1), 1) {
4378 return false
4379 } else if math.IsInf(float64(z1), -1) {
4380 return true
4381 } else if math.IsInf(float64(z2), 1) {
4382 return true
4383 } else if math.IsInf(float64(z2), -1) {
4384 return false
4385 }
4386 return z1 < z2
4387}
4388
4389// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
4390func flt64(z1, z2 float64) bool {
4391 if z1 != z1 || z2 != z2 {
4392 return false
4393 } else if z1 == z2 {
4394 return false
4395 } else if math.IsInf(z1, 1) {
4396 return false
4397 } else if math.IsInf(z1, -1) {
4398 return true
4399 } else if math.IsInf(z2, 1) {
4400 return true
4401 } else if math.IsInf(z2, -1) {
4402 return false
4403 }
4404 return z1 < z2
4405}
4406
4407func i8RoundingAverage(v1, v2 byte) byte {
4408 // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
4409 return byte((uint16(v1) + uint16(v2) + uint16(1)) / 2)
4410}
4411
4412func i16RoundingAverage(v1, v2 uint16) uint16 {
4413 // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
4414 return uint16((uint32(v1) + uint32(v2) + 1) / 2)
4415}
4416
4417func i8Abs(v byte) byte {
4418 if i := int8(v); i < 0 {
4419 return byte(-i)
4420 } else {
4421 return byte(i)
4422 }
4423}
4424
4425func i8MaxU(v1, v2 byte) byte {
4426 if v1 < v2 {
4427 return v2
4428 } else {
4429 return v1
4430 }
4431}
4432
4433func i8MinU(v1, v2 byte) byte {
4434 if v1 > v2 {
4435 return v2
4436 } else {
4437 return v1
4438 }
4439}
4440
4441func i8MaxS(v1, v2 byte) byte {
4442 if int8(v1) < int8(v2) {
4443 return v2
4444 } else {
4445 return v1
4446 }
4447}
4448
4449func i8MinS(v1, v2 byte) byte {
4450 if int8(v1) > int8(v2) {
4451 return v2
4452 } else {
4453 return v1
4454 }
4455}
4456
4457func i16MaxU(v1, v2 uint16) uint16 {
4458 if v1 < v2 {
4459 return v2
4460 } else {
4461 return v1
4462 }
4463}
4464
4465func i16MinU(v1, v2 uint16) uint16 {
4466 if v1 > v2 {
4467 return v2
4468 } else {
4469 return v1
4470 }
4471}
4472
4473func i16MaxS(v1, v2 uint16) uint16 {
4474 if int16(v1) < int16(v2) {
4475 return v2
4476 } else {
4477 return v1
4478 }
4479}
4480
4481func i16MinS(v1, v2 uint16) uint16 {
4482 if int16(v1) > int16(v2) {
4483 return v2
4484 } else {
4485 return v1
4486 }
4487}
4488
4489func i32MaxU(v1, v2 uint32) uint32 {
4490 if v1 < v2 {
4491 return v2
4492 } else {
4493 return v1
4494 }
4495}
4496
4497func i32MinU(v1, v2 uint32) uint32 {
4498 if v1 > v2 {
4499 return v2
4500 } else {
4501 return v1
4502 }
4503}
4504
4505func i32MaxS(v1, v2 uint32) uint32 {
4506 if int32(v1) < int32(v2) {
4507 return v2
4508 } else {
4509 return v1
4510 }
4511}
4512
4513func i32MinS(v1, v2 uint32) uint32 {
4514 if int32(v1) > int32(v2) {
4515 return v2
4516 } else {
4517 return v1
4518 }
4519}
4520
4521func i16Abs(v uint16) uint16 {
4522 if i := int16(v); i < 0 {
4523 return uint16(-i)
4524 } else {
4525 return uint16(i)
4526 }
4527}
4528
4529func i32Abs(v uint32) uint32 {
4530 if i := int32(v); i < 0 {
4531 return uint32(-i)
4532 } else {
4533 return uint32(i)
4534 }
4535}
4536
4537func (ce *callEngine) callNativeFuncWithListener(ctx context.Context, m *wasm.ModuleInstance, f *function, fnl experimental.FunctionListener) context.Context {
4538 def, typ := f.definition(), f.funcType
4539
4540 ce.stackIterator.reset(ce.stack, ce.frames, f)
4541 fnl.Before(ctx, m, def, ce.peekValues(typ.ParamNumInUint64), &ce.stackIterator)
4542 ce.stackIterator.clear()
4543 ce.callNativeFunc(ctx, m, f)
4544 fnl.After(ctx, m, def, ce.peekValues(typ.ResultNumInUint64))
4545 return ctx
4546}
4547
4548// popMemoryOffset takes a memory offset off the stack for use in load and store instructions.
4549// As the top of stack value is 64-bit, this ensures it is in range before returning it.
4550func (ce *callEngine) popMemoryOffset(op *unionOperation) uint32 {
4551 offset := op.U2 + ce.popValue()
4552 if offset > math.MaxUint32 {
4553 panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
4554 }
4555 return uint32(offset)
4556}
4557
4558func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleInstance, f *function) {
4559 typ := f.funcType
4560 paramLen := typ.ParamNumInUint64
4561 resultLen := typ.ResultNumInUint64
4562 stackLen := paramLen
4563
4564 // In the interpreter engine, ce.stack may only have capacity to store
4565 // parameters. Grow when there are more results than parameters.
4566 if growLen := resultLen - paramLen; growLen > 0 {
4567 for i := 0; i < growLen; i++ {
4568 ce.stack = append(ce.stack, 0)
4569 }
4570 stackLen += growLen
4571 }
4572
4573 // Pass the stack elements to the go function.
4574 stack := ce.stack[len(ce.stack)-stackLen:]
4575 ce.callGoFunc(ctx, m, f, stack)
4576
4577 // Shrink the stack when there were more parameters than results.
4578 if shrinkLen := paramLen - resultLen; shrinkLen > 0 {
4579 ce.stack = ce.stack[0 : len(ce.stack)-shrinkLen]
4580 }
4581}
4582
4583// v128Dot performs a dot product of two 64-bit vectors.
4584// Note: for some reason (which I suspect is due to a bug in Go compiler's regalloc),
4585// inlining this function causes a bug which happens **only when** we run with -race AND arm64 AND Go 1.22.
4586func v128Dot(x1Hi, x1Lo, x2Hi, x2Lo uint64) (uint64, uint64) {
4587 r1 := int32(int16(x1Lo>>0)) * int32(int16(x2Lo>>0))
4588 r2 := int32(int16(x1Lo>>16)) * int32(int16(x2Lo>>16))
4589 r3 := int32(int16(x1Lo>>32)) * int32(int16(x2Lo>>32))
4590 r4 := int32(int16(x1Lo>>48)) * int32(int16(x2Lo>>48))
4591 r5 := int32(int16(x1Hi>>0)) * int32(int16(x2Hi>>0))
4592 r6 := int32(int16(x1Hi>>16)) * int32(int16(x2Hi>>16))
4593 r7 := int32(int16(x1Hi>>32)) * int32(int16(x2Hi>>32))
4594 r8 := int32(int16(x1Hi>>48)) * int32(int16(x2Hi>>48))
4595 return uint64(uint32(r1+r2)) | (uint64(uint32(r3+r4)) << 32), uint64(uint32(r5+r6)) | (uint64(uint32(r7+r8)) << 32)
4596}