goat

command module
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 3, 2023 License: Apache-2.0 Imports: 15 Imported by: 0

README

GoAT

Go assembly transpiler for C programming languages.

It help to utilize optimization from C compiler in Go projects. For example, generate SIMD vectorized functions for Go (refer to How to Use AVX512 in Golang).

Install

go install github.com/gorse-io/goat@latest

Usage

cd example

goat src/mul_to.c -O3 -mavx -mfma -mavx512f -mavx512dq
GoAT transpiles example/src/mul_to.c to two files.

Go function definition file mul_to.go:

//go:build !noasm && amd64
// AUTO-GENERATED BY GOAT -- DO NOT EDIT

package example

import "unsafe"

//go:noescape
func mul_to(a, b, c, n unsafe.Pointer)
Go assembly file mul_to.s:
//go:build !noasm && amd64
// AUTO-GENERATED BY GOAT -- DO NOT EDIT

TEXT ·mul_to(SB), $0-32
	MOVQ a+0(FP), DI
	MOVQ b+8(FP), SI
	MOVQ c+16(FP), DX
	MOVQ n+24(FP), CX
	BYTE $0x55               // pushq	%rbp
	WORD $0x8948; BYTE $0xe5 // movq	%rsp, %rbp
	LONG $0xf8e48348         // andq	$-8, %rsp
	WORD $0x8548; BYTE $0xc9 // testq	%rcx, %rcx
	JLE  LBB0_12
	LONG $0x3ff98348         // cmpq	$63, %rcx
	JA   LBB0_7
	WORD $0xc031             // xorl	%eax, %eax
	JMP  LBB0_3

LBB0_7:
	LONG $0x8a0c8d4c         // leaq	(%rdx,%rcx,4), %r9
	LONG $0x8f048d48         // leaq	(%rdi,%rcx,4), %rax
	WORD $0x3948; BYTE $0xd0 // cmpq	%rdx, %rax
	LONG $0xc2970f41         // seta	%r10b
	LONG $0x8e048d48         // leaq	(%rsi,%rcx,4), %rax
	WORD $0x3949; BYTE $0xf9 // cmpq	%rdi, %r9
	LONG $0xc3970f41         // seta	%r11b
	WORD $0x3948; BYTE $0xd0 // cmpq	%rdx, %rax
	LONG $0xc0970f41         // seta	%r8b
	WORD $0x3949; BYTE $0xf1 // cmpq	%rsi, %r9
	LONG $0xc1970f41         // seta	%r9b
	WORD $0xc031             // xorl	%eax, %eax
	WORD $0x8445; BYTE $0xda // testb	%r11b, %r10b
	JNE  LBB0_3
	WORD $0x2045; BYTE $0xc8 // andb	%r9b, %r8b
	JNE  LBB0_3
	WORD $0x8948; BYTE $0xc8 // movq	%rcx, %rax
	LONG $0xc0e08348         // andq	$-64, %rax
	WORD $0x3145; BYTE $0xc0 // xorl	%r8d, %r8d

LBB0_10:
	LONG $0x487cb162; WORD $0x0410; BYTE $0x87 // vmovups	(%rdi,%r8,4), %zmm0
	QUAD $0x01874c10487cb162                   // vmovups	64(%rdi,%r8,4), %zmm1
	QUAD $0x02875410487cb162                   // vmovups	128(%rdi,%r8,4), %zmm2
	QUAD $0x03875c10487cb162                   // vmovups	192(%rdi,%r8,4), %zmm3
	LONG $0x487cb162; WORD $0x0459; BYTE $0x86 // vmulps	(%rsi,%r8,4), %zmm0, %zmm0
	QUAD $0x01864c594874b162                   // vmulps	64(%rsi,%r8,4), %zmm1, %zmm1
	QUAD $0x02865459486cb162                   // vmulps	128(%rsi,%r8,4), %zmm2, %zmm2
	QUAD $0x03865c594864b162                   // vmulps	192(%rsi,%r8,4), %zmm3, %zmm3
	LONG $0x487cb162; WORD $0x0411; BYTE $0x82 // vmovups	%zmm0, (%rdx,%r8,4)
	QUAD $0x01824c11487cb162                   // vmovups	%zmm1, 64(%rdx,%r8,4)
	QUAD $0x02825411487cb162                   // vmovups	%zmm2, 128(%rdx,%r8,4)
	QUAD $0x03825c11487cb162                   // vmovups	%zmm3, 192(%rdx,%r8,4)
	LONG $0x40c08349                           // addq	$64, %r8
	WORD $0x394c; BYTE $0xc0                   // cmpq	%r8, %rax
	JNE  LBB0_10
	WORD $0x3948; BYTE $0xc8                   // cmpq	%rcx, %rax
	JE   LBB0_12

LBB0_3:
	WORD $0x8949; BYTE $0xc0 // movq	%rax, %r8
	WORD $0xf749; BYTE $0xd0 // notq	%r8
	WORD $0x0149; BYTE $0xc8 // addq	%rcx, %r8
	WORD $0x8949; BYTE $0xc9 // movq	%rcx, %r9
	LONG $0x03e18349         // andq	$3, %r9
	JE   LBB0_5

LBB0_4:
	LONG $0x0410fac5; BYTE $0x87 // vmovss	(%rdi,%rax,4), %xmm0
	LONG $0x0459fac5; BYTE $0x86 // vmulss	(%rsi,%rax,4), %xmm0, %xmm0
	LONG $0x0411fac5; BYTE $0x82 // vmovss	%xmm0, (%rdx,%rax,4)
	LONG $0x01c08348             // addq	$1, %rax
	LONG $0xffc18349             // addq	$-1, %r9
	JNE  LBB0_4

LBB0_5:
	LONG $0x03f88349 // cmpq	$3, %r8
	JB   LBB0_12

LBB0_6:
	LONG $0x0410fac5; BYTE $0x87   // vmovss	(%rdi,%rax,4), %xmm0
	LONG $0x0459fac5; BYTE $0x86   // vmulss	(%rsi,%rax,4), %xmm0, %xmm0
	LONG $0x0411fac5; BYTE $0x82   // vmovss	%xmm0, (%rdx,%rax,4)
	LONG $0x4410fac5; WORD $0x0487 // vmovss	4(%rdi,%rax,4), %xmm0
	LONG $0x4459fac5; WORD $0x0486 // vmulss	4(%rsi,%rax,4), %xmm0, %xmm0
	LONG $0x4411fac5; WORD $0x0482 // vmovss	%xmm0, 4(%rdx,%rax,4)
	LONG $0x4410fac5; WORD $0x0887 // vmovss	8(%rdi,%rax,4), %xmm0
	LONG $0x4459fac5; WORD $0x0886 // vmulss	8(%rsi,%rax,4), %xmm0, %xmm0
	LONG $0x4411fac5; WORD $0x0882 // vmovss	%xmm0, 8(%rdx,%rax,4)
	LONG $0x4410fac5; WORD $0x0c87 // vmovss	12(%rdi,%rax,4), %xmm0
	LONG $0x4459fac5; WORD $0x0c86 // vmulss	12(%rsi,%rax,4), %xmm0, %xmm0
	LONG $0x4411fac5; WORD $0x0c82 // vmovss	%xmm0, 12(%rdx,%rax,4)
	LONG $0x04c08348               // addq	$4, %rax
	WORD $0x3948; BYTE $0xc1       // cmpq	%rax, %rcx
	JNE  LBB0_6

LBB0_12:
	WORD $0x8948; BYTE $0xec // movq	%rbp, %rsp
	BYTE $0x5d               // popq	%rbp
	WORD $0xf8c5; BYTE $0x77 // vzeroupper
	BYTE $0xc3               // retq
Finally, the mul_to function can be called by:

func MulTo(a, b, c []float32) {
	if len(a) ! = len(b) || len(a) ! = len(c) {
		panic("floats: slice lengths do not match")
	}
	mul_to(unsafe.Pointer(&a[0]), unsafe.Pointer(&b[0]), unsafe.Pointer(&c[0]), unsafe.Pointer(uintptr(len(a))))
}

Limitations

  • Arguments need (for now) to be 64-bit size, meaning either a value or a pointer
  • Maximum number of 4 arguments
  • Generally no call statements

Acknowledgments

GoAT is inspired by c2goasm.

Documentation

Overview

Copyright 2022 gorse Project Authors

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL