sse4

package

v0.0.0-...-3878f85 Latest Latest Go to latest Published: Jul 23, 2017 License: MIT Imports: 1 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/klauspost/intrinsics

Links

Open Source Insights

Documentation ¶

Overview ¶

THESE PACKAGES ARE FOR DEMONSTRATION PURPOSES ONLY!

THEY DO NOT NOT CONTAIN WORKING INTRINSICS!

See https://github.com/klauspost/intrinsics

Index ¶

Constants
func BlendEpi16(a x86.M128i, b x86.M128i, imm8 byte) (dst x86.M128i)
func BlendPd(a x86.M128d, b x86.M128d, imm8 byte) (dst x86.M128d)
func BlendPs(a x86.M128, b x86.M128, imm8 byte) (dst x86.M128)
func BlendvEpi8(a x86.M128i, b x86.M128i, mask x86.M128i) (dst x86.M128i)
func BlendvPd(a x86.M128d, b x86.M128d, mask x86.M128d) (dst x86.M128d)
func BlendvPs(a x86.M128, b x86.M128, mask x86.M128) (dst x86.M128)
func CeilPd(a x86.M128d) (dst x86.M128d)
func CeilPs(a x86.M128) (dst x86.M128)
func CeilSd(a x86.M128d, b x86.M128d) (dst x86.M128d)
func CeilSs(a x86.M128, b x86.M128) (dst x86.M128)
func CmpeqEpi64(a x86.M128i, b x86.M128i) (dst x86.M128i)
func Cmpestra(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) int
func Cmpestrc(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) int
func Cmpestri(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) int
func Cmpestrm(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) (dst x86.M128i)
func Cmpestro(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) int
func Cmpestrs(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) int
func Cmpestrz(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) int
func CmpgtEpi64(a x86.M128i, b x86.M128i) (dst x86.M128i)
func Cmpistra(a x86.M128i, b x86.M128i, imm8 byte) int
func Cmpistrc(a x86.M128i, b x86.M128i, imm8 byte) int
func Cmpistri(a x86.M128i, b x86.M128i, imm8 byte) int
func Cmpistrm(a x86.M128i, b x86.M128i, imm8 byte) (dst x86.M128i)
func Cmpistro(a x86.M128i, b x86.M128i, imm8 byte) int
func Cmpistrs(a x86.M128i, b x86.M128i, imm8 byte) int
func Cmpistrz(a x86.M128i, b x86.M128i, imm8 byte) int
func Crc32U16(crc uint32, v uint16) uint32
func Crc32U32(crc uint32, v uint32) uint32
func Crc32U64(crc uint64, v uint64) uint64
func Crc32U8(crc uint32, v uint8) uint32
func Cvtepi16Epi32(a x86.M128i) (dst x86.M128i)
func Cvtepi16Epi64(a x86.M128i) (dst x86.M128i)
func Cvtepi32Epi64(a x86.M128i) (dst x86.M128i)
func Cvtepi8Epi16(a x86.M128i) (dst x86.M128i)
func Cvtepi8Epi32(a x86.M128i) (dst x86.M128i)
func Cvtepi8Epi64(a x86.M128i) (dst x86.M128i)
func Cvtepu16Epi32(a x86.M128i) (dst x86.M128i)
func Cvtepu16Epi64(a x86.M128i) (dst x86.M128i)
func Cvtepu32Epi64(a x86.M128i) (dst x86.M128i)
func Cvtepu8Epi16(a x86.M128i) (dst x86.M128i)
func Cvtepu8Epi32(a x86.M128i) (dst x86.M128i)
func Cvtepu8Epi64(a x86.M128i) (dst x86.M128i)
func DpPd(a x86.M128d, b x86.M128d, imm8 byte) (dst x86.M128d)
func DpPs(a x86.M128, b x86.M128, imm8 byte) (dst x86.M128)
func ExtractEpi32(a x86.M128i, imm8 byte) int
func ExtractEpi64(a x86.M128i, imm8 byte) int64
func ExtractEpi8(a x86.M128i, imm8 byte) int
func ExtractPs(a x86.M128, imm8 byte) int
func FloorPd(a x86.M128d) (dst x86.M128d)
func FloorPs(a x86.M128) (dst x86.M128)
func FloorSd(a x86.M128d, b x86.M128d) (dst x86.M128d)
func FloorSs(a x86.M128, b x86.M128) (dst x86.M128)
func InsertEpi32(a x86.M128i, i int, imm8 byte) (dst x86.M128i)
func InsertEpi64(a x86.M128i, i int64, imm8 byte) (dst x86.M128i)
func InsertEpi8(a x86.M128i, i int, imm8 byte) (dst x86.M128i)
func InsertPs(a x86.M128, b x86.M128, imm8 byte) (dst x86.M128)
func MaxEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)
func MaxEpi8(a x86.M128i, b x86.M128i) (dst x86.M128i)
func MaxEpu16(a x86.M128i, b x86.M128i) (dst x86.M128i)
func MaxEpu32(a x86.M128i, b x86.M128i) (dst x86.M128i)
func MinEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)
func MinEpi8(a x86.M128i, b x86.M128i) (dst x86.M128i)
func MinEpu16(a x86.M128i, b x86.M128i) (dst x86.M128i)
func MinEpu32(a x86.M128i, b x86.M128i) (dst x86.M128i)
func MinposEpu16(a x86.M128i) (dst x86.M128i)
func MpsadbwEpu8(a x86.M128i, b x86.M128i, imm8 byte) (dst x86.M128i)
func MulEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)
func MulloEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)
func PackusEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)
func RoundPd(a x86.M128d, rounding int) (dst x86.M128d)
func RoundPs(a x86.M128, rounding int) (dst x86.M128)
func RoundSd(a x86.M128d, b x86.M128d, rounding int) (dst x86.M128d)
func RoundSs(a x86.M128, b x86.M128, rounding int) (dst x86.M128)
func StreamLoadSi128(mem_addr *x86.M128i) (dst x86.M128i)
func TestAllOnes(a x86.M128i) int
func TestAllZeros(a x86.M128i, mask x86.M128i) int
func TestMixOnesZeros(a x86.M128i, mask x86.M128i) int
func TestcSi128(a x86.M128i, b x86.M128i) int
func TestnzcSi128(a x86.M128i, b x86.M128i) int
func TestzSi128(a x86.M128i, b x86.M128i) int

Constants ¶

View Source

const BIT_MASK = 0x00

These macros are used in CmpXstri() to specify the return.

View Source

const CMP_EQUAL_ANY = 0x00 // For each character c in a, determine whether any character in b is equal to c. The first operand is a character set, the second is a string (think of strspn or strcspn).

These specify the type of comparison operation.

View Source

const CMP_EQUAL_EACH = 0x08 // This implements the string equality algorithm. This operation compares two strings (think of strcmp or memcmp). The result of comparison is a bit mask (1 if the corresponding bytes are equal, 0 if not equal).

View Source

const CMP_EQUAL_ORDERED = 0x0c // This implements the substring search algorithm.  The first operand contains a string to search for, the second is a string to search in. The bit mask includes 1 if the substring is found at the corresponding position:

View Source

const CMP_RANGES = 0x04 // For each character c in a, determine whether b0 <= c <= b1or b2 <= c <= b3… The first operand consists of ranges, for example, "azAZ" means "all characters from a to z and all characters from A to Z

View Source

const LEAST_SIGNIFICANT = 0x00 //  The index of the rightmost bit set to 1 is returned.

These macros are used in CmpXstri() to specify the return.

View Source

const MASKED_NEGATIVE_POLARITY = 0x30 // Negation of resulting bitmask except for bits that have an index larger than the size of a or b (see details of pcmpestri instruction).

View Source

const MASKED_POSITIVE_POLARITY = 0x20 //

View Source

const MOST_SIGNIFICANT = 0x40 // The index of the leftmost bit set to 1 is returned.

View Source

const NEGATIVE_POLARITY = 0x10 // Negation of resulting bitmask.

View Source

const POSITIVE_POLARITY = 0x00 // No effect

These macros specify the polarity of the operation.

View Source

const SBYTE_OPS = 0x02 // a and b contain strings of signed 8-bit characters.

View Source

const SWORD_OPS = 0x03 // a and b contain strings of signed 16-bit characters.

View Source

const UBYTE_OPS = 0x00 // a and b contain strings of unsigned 8-bit characters.

These specify the type of data that we're comparing.

View Source

const UNIT_MASK = 0x40

View Source

const UWORD_OPS = 0x01 // a and b contain strings of unsigned 16-bit characters.

Variables ¶

This section is empty.

Functions ¶

func BlendEpi16 ¶

func BlendEpi16(a x86.M128i, b x86.M128i, imm8 byte) (dst x86.M128i)

BlendEpi16: Blend packed 16-bit integers from 'a' and 'b' using control mask 'imm8', and store the results in 'dst'.

FOR j := 0 to 7
	i := j*16
	IF imm8[j%8]
		dst[i+15:i] := b[i+15:i]
	ELSE
		dst[i+15:i] := a[i+15:i]
	FI
ENDFOR

Instruction: 'PBLENDW'. Intrinsic: '_mm_blend_epi16'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func BlendPd ¶

func BlendPd(a x86.M128d, b x86.M128d, imm8 byte) (dst x86.M128d)

BlendPd: Blend packed double-precision (64-bit) floating-point elements from 'a' and 'b' using control mask 'imm8', and store the results in 'dst'.

FOR j := 0 to 1
	i := j*64
	IF imm8[j%8]
		dst[i+63:i] := b[i+63:i]
	ELSE
		dst[i+63:i] := a[i+63:i]
	FI
ENDFOR

Instruction: 'BLENDPD'. Intrinsic: '_mm_blend_pd'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func BlendPs ¶

func BlendPs(a x86.M128, b x86.M128, imm8 byte) (dst x86.M128)

BlendPs: Blend packed single-precision (32-bit) floating-point elements from 'a' and 'b' using control mask 'imm8', and store the results in 'dst'.

FOR j := 0 to 3
	i := j*32
	IF imm8[j%8]
		dst[i+31:i] := b[i+31:i]
	ELSE
		dst[i+31:i] := a[i+31:i]
	FI
ENDFOR

Instruction: 'BLENDPS'. Intrinsic: '_mm_blend_ps'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func BlendvEpi8 ¶

func BlendvEpi8(a x86.M128i, b x86.M128i, mask x86.M128i) (dst x86.M128i)

BlendvEpi8: Blend packed 8-bit integers from 'a' and 'b' using 'mask', and store the results in 'dst'.

FOR j := 0 to 15
	i := j*8
	IF mask[i+7]
		dst[i+7:i] := b[i+7:i]
	ELSE
		dst[i+7:i] := a[i+7:i]
	FI
ENDFOR

Instruction: 'PBLENDVB'. Intrinsic: '_mm_blendv_epi8'. Requires SSE4.1.

func BlendvPd ¶

func BlendvPd(a x86.M128d, b x86.M128d, mask x86.M128d) (dst x86.M128d)

BlendvPd: Blend packed double-precision (64-bit) floating-point elements from 'a' and 'b' using 'mask', and store the results in 'dst'.

FOR j := 0 to 1
	i := j*64
	IF mask[i+63]
		dst[i+63:i] := b[i+63:i]
	ELSE
		dst[i+63:i] := a[i+63:i]
	FI
ENDFOR

Instruction: 'BLENDVPD'. Intrinsic: '_mm_blendv_pd'. Requires SSE4.1.

func BlendvPs ¶

func BlendvPs(a x86.M128, b x86.M128, mask x86.M128) (dst x86.M128)

BlendvPs: Blend packed single-precision (32-bit) floating-point elements from 'a' and 'b' using 'mask', and store the results in 'dst'.

FOR j := 0 to 3
	i := j*32
	IF mask[i+31]
		dst[i+31:i] := b[i+31:i]
	ELSE
		dst[i+31:i] := a[i+31:i]
	FI
ENDFOR

Instruction: 'BLENDVPS'. Intrinsic: '_mm_blendv_ps'. Requires SSE4.1.

func CeilPd ¶

func CeilPd(a x86.M128d) (dst x86.M128d)

CeilPd: Round the packed double-precision (64-bit) floating-point elements in 'a' up to an integer value, and store the results as packed double-precision floating-point elements in 'dst'.

FOR j := 0 to 1
	i := j*64
	dst[i+63:i] := CEIL(a[i+63:i])
ENDFOR

Instruction: 'ROUNDPD'. Intrinsic: '_mm_ceil_pd'. Requires SSE4.1.

func CeilPs ¶

func CeilPs(a x86.M128) (dst x86.M128)

CeilPs: Round the packed single-precision (32-bit) floating-point elements in 'a' up to an integer value, and store the results as packed single-precision floating-point elements in 'dst'.

FOR j := 0 to 3
	i := j*32
	dst[i+31:i] := CEIL(a[i+31:i])
ENDFOR

Instruction: 'ROUNDPS'. Intrinsic: '_mm_ceil_ps'. Requires SSE4.1.

func CeilSd ¶

func CeilSd(a x86.M128d, b x86.M128d) (dst x86.M128d)

CeilSd: Round the lower double-precision (64-bit) floating-point element in 'b' up to an integer value, store the result as a double-precision floating-point element in the lower element of 'dst', and copy the upper element from 'a' to the upper element of 'dst'.

dst[63:0] := CEIL(b[63:0])
dst[127:64] := a[127:64]

Instruction: 'ROUNDSD'. Intrinsic: '_mm_ceil_sd'. Requires SSE4.1.

func CeilSs ¶

func CeilSs(a x86.M128, b x86.M128) (dst x86.M128)

CeilSs: Round the lower single-precision (32-bit) floating-point element in 'b' up to an integer value, store the result as a single-precision floating-point element in the lower element of 'dst', and copy the upper 3 packed elements from 'a' to the upper elements of 'dst'.

dst[31:0] := CEIL(b[31:0])
dst[127:32] := a[127:32]

Instruction: 'ROUNDSS'. Intrinsic: '_mm_ceil_ss'. Requires SSE4.1.

func CmpeqEpi64 ¶

func CmpeqEpi64(a x86.M128i, b x86.M128i) (dst x86.M128i)

CmpeqEpi64: Compare packed 64-bit integers in 'a' and 'b' for equality, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*64
	dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
ENDFOR

Instruction: 'PCMPEQQ'. Intrinsic: '_mm_cmpeq_epi64'. Requires SSE4.1.

func Cmpestra ¶

func Cmpestra(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) int

Cmpestra: Compare packed strings in 'a' and 'b' with lengths 'la' and 'lb' using the control in 'imm8', and returns 1 if 'b' did not contain a null character and the resulting mask was zero, and 0 otherwise.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		// compare all characters
		aInvalid := 0
		bInvalid := 0
		FOR i := 0 to UpperBound
			m := i*size
			FOR j := 0 to UpperBound
				n := j*size
				BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])

				// invalidate characters after EOS
				IF i == la
					aInvalid := 1
				FI
				IF j == lb
					bInvalid := 1
				FI

				// override comparisons for invalid characters
				CASE (imm8[3:2]) OF
					0:  // equal any
						IF (!aInvalid && bInvalid)
							BoolRes[i][j] := 0
						ELSE IF (aInvalid && !bInvalid)
							BoolRes[i][j] := 0
						ELSE If (aInvalid && bInvalid)
							BoolRes[i][j] := 0
						FI
					1:  // ranges
						IF (!aInvalid && bInvalid)
							BoolRes[i][j] := 0
						ELSE IF (aInvalid && !bInvalid)
							BoolRes[i][j] := 0
						ELSE If (aInvalid && bInvalid)
							BoolRes[i][j] := 0
						FI
					2:  // equal each
						IF (!aInvalid && bInvalid)
							BoolRes[i][j] := 0
						ELSE IF (aInvalid && !bInvalid)
							BoolRes[i][j] := 0
						ELSE If (aInvalid && bInvalid)
							BoolRes[i][j] := 1
						FI
					3:  // equal ordered
						IF (!aInvalid && bInvalid)
							BoolRes[i][j] := 0
						ELSE IF (aInvalid && !bInvalid)
							BoolRes[i][j] := 1
						ELSE If (aInvalid && bInvalid)
							BoolRes[i][j] := 1
						FI
				ESAC
			ENDFOR
		ENDFOR

		// aggregate results
		CASE (imm8[3:2]) OF
			0:  // equal any
				IntRes1 := 0
				FOR i := 0 to UpperBound
					FOR j := 0 to UpperBound
						IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
					ENDFOR
				ENDFOR
			1:  // ranges
				IntRes1 := 0
				FOR i := 0 to UpperBound
					FOR j := 0 to UpperBound, j += 2
						IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
					ENDFOR
				ENDFOR
			2:  // equal each
				IntRes1 := 0
				FOR i := 0 to UpperBound
					IntRes1[i] := BoolRes[i][i]
				ENDFOR
			3:  // equal ordered
				IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
				FOR i := 0 to UpperBound
					k := i
					FOR j := 0 to UpperBound-i
						IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
						k++
					ENDFOR
				ENDFOR
		ESAC

		// optionally negate results
		FOR i := 0 to UpperBound
			IF imm8[4]
				IF imm8[5] // only negate valid
					IF i >= lb // invalid, don't negate
						IntRes2[i] := IntRes1[i]
					ELSE // valid, negate
						IntRes2[i] := -1 XOR IntRes1[i]
					FI
				ELSE // negate all
					IntRes2[i] := -1 XOR IntRes1[i]
				FI
			ELSE // don't negate
				IntRes2[i] := IntRes1[i]
			FI
		ENDFOR

		// output
		dst := (IntRes2 == 0) AND (lb > UpperBound)

Instruction: 'PCMPESTRI'. Intrinsic: '_mm_cmpestra'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Cmpestrc ¶

func Cmpestrc(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) int

Cmpestrc: Compare packed strings in 'a' and 'b' with lengths 'la' and 'lb' using the control in 'imm8', and returns 1 if the resulting mask was non-zero, and 0 otherwise.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		// compare all characters
		aInvalid := 0
		bInvalid := 0
		FOR i := 0 to UpperBound
			m := i*size
			FOR j := 0 to UpperBound
				n := j*size
				BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])

				// invalidate characters after EOS
				IF i == la
					aInvalid := 1
				FI
				IF j == lb
					bInvalid := 1
				FI

				// override comparisons for invalid characters
				CASE (imm8[3:2]) OF
					0:  // equal any
						IF (!aInvalid && bInvalid)
							BoolRes[i][j] := 0
						ELSE IF (aInvalid && !bInvalid)
							BoolRes[i][j] := 0
						ELSE If (aInvalid && bInvalid)
							BoolRes[i][j] := 0
						FI
					1:  // ranges
						IF (!aInvalid && bInvalid)
							BoolRes[i][j] := 0
						ELSE IF (aInvalid && !bInvalid)
							BoolRes[i][j] := 0
						ELSE If (aInvalid && bInvalid)
							BoolRes[i][j] := 0
						FI
					2:  // equal each
						IF (!aInvalid && bInvalid)
							BoolRes[i][j] := 0
						ELSE IF (aInvalid && !bInvalid)
							BoolRes[i][j] := 0
						ELSE If (aInvalid && bInvalid)
							BoolRes[i][j] := 1
						FI
					3:  // equal ordered
						IF (!aInvalid && bInvalid)
							BoolRes[i][j] := 0
						ELSE IF (aInvalid && !bInvalid)
							BoolRes[i][j] := 1
						ELSE If (aInvalid && bInvalid)
							BoolRes[i][j] := 1
						FI
				ESAC
			ENDFOR
		ENDFOR

		// aggregate results
		CASE (imm8[3:2]) OF
			0:  // equal any
				IntRes1 := 0
				FOR i := 0 to UpperBound
					FOR j := 0 to UpperBound
						IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
					ENDFOR
				ENDFOR
			1:  // ranges
				IntRes1 := 0
				FOR i := 0 to UpperBound
					FOR j := 0 to UpperBound, j += 2
						IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
					ENDFOR
				ENDFOR
			2:  // equal each
				IntRes1 := 0
				FOR i := 0 to UpperBound
					IntRes1[i] := BoolRes[i][i]
				ENDFOR
			3:  // equal ordered
				IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
				FOR i := 0 to UpperBound
					k := i
					FOR j := 0 to UpperBound-i
						IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
						k++
					ENDFOR
				ENDFOR
		ESAC

		// optionally negate results
		FOR i := 0 to UpperBound
			IF imm8[4]
				IF imm8[5] // only negate valid
					IF i >= lb // invalid, don't negate
						IntRes2[i] := IntRes1[i]
					ELSE // valid, negate
						IntRes2[i] := -1 XOR IntRes1[i]
					FI
				ELSE // negate all
					IntRes2[i] := -1 XOR IntRes1[i]
				FI
			ELSE // don't negate
				IntRes2[i] := IntRes1[i]
			FI
		ENDFOR

		// output
		dst := (IntRes2 != 0)

Instruction: 'PCMPESTRI'. Intrinsic: '_mm_cmpestrc'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Cmpestri ¶

func Cmpestri(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) int

Cmpestri: Compare packed strings in 'a' and 'b' with lengths 'la' and 'lb' using the control in 'imm8', and store the generated index in 'dst'.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		// compare all characters
		aInvalid := 0
		bInvalid := 0
		FOR i := 0 to UpperBound
			m := i*size
			FOR j := 0 to UpperBound
				n := j*size
				BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])

				// invalidate characters after EOS
				IF i == la
					aInvalid := 1
				FI
				IF j == lb
					bInvalid := 1
				FI

				// override comparisons for invalid characters
				CASE (imm8[3:2]) OF
				0:  // equal any
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				1:  // ranges
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				2:  // equal each
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				3:  // equal ordered
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 1
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				ESAC
			ENDFOR
		ENDFOR

		// aggregate results
		CASE (imm8[3:2]) OF
		0:  // equal any
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound
					IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
				ENDFOR
			ENDFOR
		1:  // ranges
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound, j += 2
					IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
				ENDFOR
			ENDFOR
		2:  // equal each
			IntRes1 := 0
			FOR i := 0 to UpperBound
				IntRes1[i] := BoolRes[i][i]
			ENDFOR
		3:  // equal ordered
			IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
			FOR i := 0 to UpperBound
				k := i
				FOR j := 0 to UpperBound-i
					IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
					k++
				ENDFOR
			ENDFOR
		ESAC

		// optionally negate results
		FOR i := 0 to UpperBound
			IF imm8[4]
				IF imm8[5] // only negate valid
					IF i >= lb // invalid, don't negate
						IntRes2[i] := IntRes1[i]
					ELSE // valid, negate
						IntRes2[i] := -1 XOR IntRes1[i]
					FI
				ELSE // negate all
					IntRes2[i] := -1 XOR IntRes1[i]
				FI
			ELSE // don't negate
				IntRes2[i] := IntRes1[i]
			FI
		ENDFOR

		// output
		IF imm8[6] // most significant bit
			tmp := UpperBound
			dst := tmp
			DO WHILE ((tmp >= 0) AND a[tmp] = 0)
				tmp := tmp - 1
				dst := tmp
			OD
		ELSE // least significant bit
			tmp := 0
			dst := tmp
			DO WHILE ((tmp <= UpperBound) AND a[tmp] = 0)
				tmp := tmp + 1
				dst := tmp
			OD
		FI

Instruction: 'PCMPESTRI'. Intrinsic: '_mm_cmpestri'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Cmpestrm ¶

func Cmpestrm(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) (dst x86.M128i)

Cmpestrm: Compare packed strings in 'a' and 'b' with lengths 'la' and 'lb' using the control in 'imm8', and store the generated mask in 'dst'.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		// compare all characters
		aInvalid := 0
		bInvalid := 0
		FOR i := 0 to UpperBound
			m := i*size
			FOR j := 0 to UpperBound
				n := j*size
				BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])

				// invalidate characters after EOS
				IF i == la
					aInvalid := 1
				FI
				IF j == lb
					bInvalid := 1
				FI

				// override comparisons for invalid characters
				CASE (imm8[3:2]) OF
				0:  // equal any
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				1:  // ranges
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				2:  // equal each
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				3:  // equal ordered
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 1
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				ESAC
			ENDFOR
		ENDFOR

		// aggregate results
		CASE (imm8[3:2]) OF
		0:  // equal any
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound
					IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
				ENDFOR
			ENDFOR
		1:  // ranges
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound, j += 2
					IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
				ENDFOR
			ENDFOR
		2:  // equal each
			IntRes1 := 0
			FOR i := 0 to UpperBound
				IntRes1[i] := BoolRes[i][i]
			ENDFOR
		3:  // equal ordered
			IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
			FOR i := 0 to UpperBound
				k := i
				FOR j := 0 to UpperBound-i
					IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
					k++
				ENDFOR
			ENDFOR
		ESAC

		// optionally negate results
		FOR i := 0 to UpperBound
			IF imm8[4]
				IF imm8[5] // only negate valid
					IF i >= lb // invalid, don't negate
						IntRes2[i] := IntRes1[i]
					ELSE // valid, negate
						IntRes2[i] := -1 XOR IntRes1[i]
					FI
				ELSE // negate all
					IntRes2[i] := -1 XOR IntRes1[i]
				FI
			ELSE // don't negate
				IntRes2[i] := IntRes1[i]
			FI
		ENDFOR

		// output
		IF imm8[6] // byte / word mask
			FOR i := 0 to UpperBound
				j := i*size
				IF IntRes2[i]
					dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF)
				ELSE
					dst[j+size-1:j] := 0
				FI
			ENDFOR
		ELSE // bit mask
			dst[UpperBound:0] := IntRes[UpperBound:0]
			dst[127:UpperBound+1] := 0
		FI

Instruction: 'PCMPESTRM'. Intrinsic: '_mm_cmpestrm'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Cmpestro ¶

func Cmpestro(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) int

Cmpestro: Compare packed strings in 'a' and 'b' with lengths 'la' and 'lb' using the control in 'imm8', and returns bit 0 of the resulting bit mask.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		// compare all characters
		aInvalid := 0
		bInvalid := 0
		FOR i := 0 to UpperBound
			m := i*size
			FOR j := 0 to UpperBound
				n := j*size
				BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])

				// invalidate characters after EOS
				IF i == la
					aInvalid := 1
				FI
				IF j == lb
					bInvalid := 1
				FI

				// override comparisons for invalid characters
				CASE (imm8[3:2]) OF
					0:  // equal any
						IF (!aInvalid && bInvalid)
							BoolRes[i][j] := 0
						ELSE IF (aInvalid && !bInvalid)
							BoolRes[i][j] := 0
						ELSE If (aInvalid && bInvalid)
							BoolRes[i][j] := 0
						FI
					1:  // ranges
						IF (!aInvalid && bInvalid)
							BoolRes[i][j] := 0
						ELSE IF (aInvalid && !bInvalid)
							BoolRes[i][j] := 0
						ELSE If (aInvalid && bInvalid)
							BoolRes[i][j] := 0
						FI
					2:  // equal each
						IF (!aInvalid && bInvalid)
							BoolRes[i][j] := 0
						ELSE IF (aInvalid && !bInvalid)
							BoolRes[i][j] := 0
						ELSE If (aInvalid && bInvalid)
							BoolRes[i][j] := 1
						FI
					3:  // equal ordered
						IF (!aInvalid && bInvalid)
							BoolRes[i][j] := 0
						ELSE IF (aInvalid && !bInvalid)
							BoolRes[i][j] := 1
						ELSE If (aInvalid && bInvalid)
							BoolRes[i][j] := 1
						FI
				ESAC
			ENDFOR
		ENDFOR

		// aggregate results
		CASE (imm8[3:2]) OF
			0:  // equal any
				IntRes1 := 0
				FOR i := 0 to UpperBound
					FOR j := 0 to UpperBound
						IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
					ENDFOR
				ENDFOR
			1:  // ranges
				IntRes1 := 0
				FOR i := 0 to UpperBound
					FOR j := 0 to UpperBound, j += 2
						IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
					ENDFOR
				ENDFOR
			2:  // equal each
				IntRes1 := 0
				FOR i := 0 to UpperBound
					IntRes1[i] := BoolRes[i][i]
				ENDFOR
			3:  // equal ordered
				IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
				FOR i := 0 to UpperBound
					k := i
					FOR j := 0 to UpperBound-i
						IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
						k++
					ENDFOR
				ENDFOR
		ESAC

		// optionally negate results
		FOR i := 0 to UpperBound
			IF imm8[4]
				IF imm8[5] // only negate valid
					IF i >= lb // invalid, don't negate
						IntRes2[i] := IntRes1[i]
					ELSE // valid, negate
						IntRes2[i] := -1 XOR IntRes1[i]
					FI
				ELSE // negate all
					IntRes2[i] := -1 XOR IntRes1[i]
				FI
			ELSE // don't negate
				IntRes2[i] := IntRes1[i]
			FI
		ENDFOR

		// output
		dst := IntRes2[0

Instruction: 'PCMPESTRI'. Intrinsic: '_mm_cmpestro'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Cmpestrs ¶

func Cmpestrs(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) int

Cmpestrs: Compare packed strings in 'a' and 'b' with lengths 'la' and 'lb' using the control in 'imm8', and returns 1 if any character in 'a' was null, and 0 otherwise.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		dst := (la <= UpperBound)

Instruction: 'PCMPESTRI'. Intrinsic: '_mm_cmpestrs'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Cmpestrz ¶

func Cmpestrz(a x86.M128i, la int, b x86.M128i, lb int, imm8 byte) int

Cmpestrz: Compare packed strings in 'a' and 'b' with lengths 'la' and 'lb' using the control in 'imm8', and returns 1 if any character in 'b' was null, and 0 otherwise.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		dst := (lb <= UpperBound)

Instruction: 'PCMPESTRI'. Intrinsic: '_mm_cmpestrz'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func CmpgtEpi64 ¶

func CmpgtEpi64(a x86.M128i, b x86.M128i) (dst x86.M128i)

CmpgtEpi64: Compare packed 64-bit integers in 'a' and 'b' for greater-than, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*64
	dst[i+63:i] := ( a[i+63:i] > b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
ENDFOR

Instruction: 'PCMPGTQ'. Intrinsic: '_mm_cmpgt_epi64'. Requires SSE4.2.

func Cmpistra ¶

func Cmpistra(a x86.M128i, b x86.M128i, imm8 byte) int

Cmpistra: Compare packed strings with implicit lengths in 'a' and 'b' using the control in 'imm8', and returns 1 if 'b' did not contain a null character and the resulting mask was zero, and 0 otherwise.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		// compare all characters
		aInvalid := 0
		bInvalid := 0
		FOR i := 0 to UpperBound
			m := i*size
			FOR j := 0 to UpperBound
				n := j*size
				BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])

				// invalidate characters after EOS
				IF a[m+size-1:m] == 0
					aInvalid := 1
				FI
				IF b[n+size-1:n] == 0
					bInvalid := 1
				FI

				// override comparisons for invalid characters
				CASE (imm8[3:2]) OF
				0:  // equal any
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				1:  // ranges
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				2:  // equal each
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				3:  // equal ordered
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 1
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				ESAC
			ENDFOR
		ENDFOR

		// aggregate results
		CASE (imm8[3:2]) OF
		0:  // equal any
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound
					IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
				ENDFOR
			ENDFOR
		1:  // ranges
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound, j += 2
					IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
				ENDFOR
			ENDFOR
		2:  // equal each
			IntRes1 := 0
			FOR i := 0 to UpperBound
				IntRes1[i] := BoolRes[i][i]
			ENDFOR
		3:  // equal ordered
			IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
			FOR i := 0 to UpperBound
				k := i
				FOR j := 0 to UpperBound-i
					IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
					k++
				ENDFOR
			ENDFOR
		ESAC

		// optionally negate results
		bInvalid := 0
		FOR i := 0 to UpperBound
			IF imm8[4]
				IF imm8[5] // only negate valid
					IF b[n+size-1:n] == 0
						bInvalid := 1
					FI
					IF bInvalid // invalid, don't negate
						IntRes2[i] := IntRes1[i]
					ELSE // valid, negate
						IntRes2[i] := -1 XOR IntRes1[i]
					FI
				ELSE // negate all
					IntRes2[i] := -1 XOR IntRes1[i]
				FI
			ELSE // don't negate
				IntRes2[i] := IntRes1[i]
			FI
		ENDFOR

		// output
		dst := (IntRes2 == 0) AND bInvalid

Instruction: 'PCMPISTRI'. Intrinsic: '_mm_cmpistra'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Cmpistrc ¶

func Cmpistrc(a x86.M128i, b x86.M128i, imm8 byte) int

Cmpistrc: Compare packed strings with implicit lengths in 'a' and 'b' using the control in 'imm8', and returns 1 if the resulting mask was non-zero, and 0 otherwise.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		// compare all characters
		aInvalid := 0
		bInvalid := 0
		FOR i := 0 to UpperBound
			m := i*size
			FOR j := 0 to UpperBound
				n := j*size
				BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])

				// invalidate characters after EOS
				IF a[m+size-1:m] == 0
					aInvalid := 1
				FI
				IF b[n+size-1:n] == 0
					bInvalid := 1
				FI

				// override comparisons for invalid characters
				CASE (imm8[3:2]) OF
				0:  // equal any
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				1:  // ranges
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				2:  // equal each
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				3:  // equal ordered
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 1
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				ESAC
			ENDFOR
		ENDFOR

		// aggregate results
		CASE (imm8[3:2]) OF
		0:  // equal any
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound
					IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
				ENDFOR
			ENDFOR
		1:  // ranges
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound, j += 2
					IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
				ENDFOR
			ENDFOR
		2:  // equal each
			IntRes1 := 0
			FOR i := 0 to UpperBound
				IntRes1[i] := BoolRes[i][i]
			ENDFOR
		3:  // equal ordered
			IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
			FOR i := 0 to UpperBound
				k := i
				FOR j := 0 to UpperBound-i
					IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
					k++
				ENDFOR
			ENDFOR
		ESAC

		// optionally negate results
		bInvalid := 0
		FOR i := 0 to UpperBound
			IF imm8[4]
				IF imm8[5] // only negate valid
					IF b[n+size-1:n] == 0
						bInvalid := 1
					FI
					IF bInvalid // invalid, don't negate
						IntRes2[i] := IntRes1[i]
					ELSE // valid, negate
						IntRes2[i] := -1 XOR IntRes1[i]
					FI
				ELSE // negate all
					IntRes2[i] := -1 XOR IntRes1[i]
				FI
			ELSE // don't negate
				IntRes2[i] := IntRes1[i]
			FI
		ENDFOR

		// output
		dst := (IntRes2 != 0)

Instruction: 'PCMPISTRI'. Intrinsic: '_mm_cmpistrc'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Cmpistri ¶

func Cmpistri(a x86.M128i, b x86.M128i, imm8 byte) int

Cmpistri: Compare packed strings with implicit lengths in 'a' and 'b' using the control in 'imm8', and store the generated index in 'dst'.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		// compare all characters
		aInvalid := 0
		bInvalid := 0
		FOR i := 0 to UpperBound
			m := i*size
			FOR j := 0 to UpperBound
				n := j*size
				BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])

				// invalidate characters after EOS
				IF a[m+size-1:m] == 0
					aInvalid := 1
				FI
				IF b[n+size-1:n] == 0
					bInvalid := 1
				FI

				// override comparisons for invalid characters
				CASE (imm8[3:2]) OF
				0:  // equal any
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				1:  // ranges
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				2:  // equal each
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				3:  // equal ordered
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 1
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				ESAC
			ENDFOR
		ENDFOR

		// aggregate results
		CASE (imm8[3:2]) OF
		0:  // equal any
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound
					IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
				ENDFOR
			ENDFOR
		1:  // ranges
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound, j += 2
					IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
				ENDFOR
			ENDFOR
		2:  // equal each
			IntRes1 := 0
			FOR i := 0 to UpperBound
				IntRes1[i] := BoolRes[i][i]
			ENDFOR
		3:  // equal ordered
			IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
			FOR i := 0 to UpperBound
				k := i
				FOR j := 0 to UpperBound-i
					IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
					k++
				ENDFOR
			ENDFOR
		ESAC

		// optionally negate results
		bInvalid := 0
		FOR i := 0 to UpperBound
			IF imm8[4]
				IF imm8[5] // only negate valid
					IF b[n+size-1:n] == 0
						bInvalid := 1
					FI
					IF bInvalid // invalid, don't negate
						IntRes2[i] := IntRes1[i]
					ELSE // valid, negate
						IntRes2[i] := -1 XOR IntRes1[i]
					FI
				ELSE // negate all
					IntRes2[i] := -1 XOR IntRes1[i]
				FI
			ELSE // don't negate
				IntRes2[i] := IntRes1[i]
			FI
		ENDFOR

		// output
		IF imm8[6] // most significant bit
			tmp := UpperBound
			dst := tmp
			DO WHILE ((tmp >= 0) AND a[tmp] = 0)
				tmp := tmp - 1
				dst := tmp
			OD
		ELSE // least significant bit
			tmp := 0
			dst := tmp
			DO WHILE ((tmp <= UpperBound) AND a[tmp] = 0)
				tmp := tmp + 1
				dst := tmp
			OD
		FI

Instruction: 'PCMPISTRI'. Intrinsic: '_mm_cmpistri'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Cmpistrm ¶

func Cmpistrm(a x86.M128i, b x86.M128i, imm8 byte) (dst x86.M128i)

Cmpistrm: Compare packed strings with implicit lengths in 'a' and 'b' using the control in 'imm8', and store the generated mask in 'dst'.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		// compare all characters
		aInvalid := 0
		bInvalid := 0
		FOR i := 0 to UpperBound
			m := i*size
			FOR j := 0 to UpperBound
				n := j*size
				BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])

				// invalidate characters after EOS
				IF a[m+size-1:m] == 0
					aInvalid := 1
				FI
				IF b[n+size-1:n] == 0
					bInvalid := 1
				FI

				// override comparisons for invalid characters
				CASE (imm8[3:2]) OF
				0:  // equal any
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				1:  // ranges
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				2:  // equal each
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				3:  // equal ordered
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 1
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				ESAC
			ENDFOR
		ENDFOR

		// aggregate results
		CASE (imm8[3:2]) OF
		0:  // equal any
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound
					IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
				ENDFOR
			ENDFOR
		1:  // ranges
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound, j += 2
					IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
				ENDFOR
			ENDFOR
		2:  // equal each
			IntRes1 := 0
			FOR i := 0 to UpperBound
				IntRes1[i] := BoolRes[i][i]
			ENDFOR
		3:  // equal ordered
			IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
			FOR i := 0 to UpperBound
				k := i
				FOR j := 0 to UpperBound-i
					IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
					k++
				ENDFOR
			ENDFOR
		ESAC

		// optionally negate results
		bInvalid := 0
		FOR i := 0 to UpperBound
			IF imm8[4]
				IF imm8[5] // only negate valid
					IF b[n+size-1:n] == 0
						bInvalid := 1
					FI
					IF bInvalid // invalid, don't negate
						IntRes2[i] := IntRes1[i]
					ELSE // valid, negate
						IntRes2[i] := -1 XOR IntRes1[i]
					FI
				ELSE // negate all
					IntRes2[i] := -1 XOR IntRes1[i]
				FI
			ELSE // don't negate
				IntRes2[i] := IntRes1[i]
			FI
		ENDFOR

		// output
		IF imm8[6] // byte / word mask
			FOR i := 0 to UpperBound
				j := i*size
				IF IntRes2[i]
					dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF)
				ELSE
					dst[j+size-1:j] := 0
				FI
			ENDFOR
		ELSE // bit mask
			dst[UpperBound:0] := IntRes[UpperBound:0]
			dst[127:UpperBound+1] := 0
		FI

Instruction: 'PCMPISTRM'. Intrinsic: '_mm_cmpistrm'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Cmpistro ¶

func Cmpistro(a x86.M128i, b x86.M128i, imm8 byte) int

Cmpistro: Compare packed strings with implicit lengths in 'a' and 'b' using the control in 'imm8', and returns bit 0 of the resulting bit mask.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		// compare all characters
		aInvalid := 0
		bInvalid := 0
		FOR i := 0 to UpperBound
			m := i*size
			FOR j := 0 to UpperBound
				n := j*size
				BoolRes[i][j] := (a[m+size-1:m] == b[n+size-1:n])

				// invalidate characters after EOS
				IF a[m+size-1:m] == 0
					aInvalid := 1
				FI
				IF b[n+size-1:n] == 0
					bInvalid := 1
				FI

				// override comparisons for invalid characters
				CASE (imm8[3:2]) OF
				0:  // equal any
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				1:  // ranges
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 0
					FI
				2:  // equal each
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 0
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				3:  // equal ordered
					IF (!aInvalid && bInvalid)
						BoolRes[i][j] := 0
					ELSE IF (aInvalid && !bInvalid)
						BoolRes[i][j] := 1
					ELSE If (aInvalid && bInvalid)
						BoolRes[i][j] := 1
					FI
				ESAC
			ENDFOR
		ENDFOR

		// aggregate results
		CASE (imm8[3:2]) OF
		0:  // equal any
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound
					IntRes1[i] := IntRes1[i] OR BoolRes[i][j]
				ENDFOR
			ENDFOR
		1:  // ranges
			IntRes1 := 0
			FOR i := 0 to UpperBound
				FOR j := 0 to UpperBound, j += 2
					IntRes1[i] := IntRes1[i] OR (BoolRes[i][j] AND BoolRes[i][j+1])
				ENDFOR
			ENDFOR
		2:  // equal each
			IntRes1 := 0
			FOR i := 0 to UpperBound
				IntRes1[i] := BoolRes[i][i]
			ENDFOR
		3:  // equal ordered
			IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
			FOR i := 0 to UpperBound
				k := i
				FOR j := 0 to UpperBound-i
					IntRes1[i] := IntRes1[i] AND BoolRes[k][j]
					k++
				ENDFOR
			ENDFOR
		ESAC

		// optionally negate results
		bInvalid := 0
		FOR i := 0 to UpperBound
			IF imm8[4]
				IF imm8[5] // only negate valid
					IF b[n+size-1:n] == 0
						bInvalid := 1
					FI
					IF bInvalid // invalid, don't negate
						IntRes2[i] := IntRes1[i]
					ELSE // valid, negate
						IntRes2[i] := -1 XOR IntRes1[i]
					FI
				ELSE // negate all
					IntRes2[i] := -1 XOR IntRes1[i]
				FI
			ELSE // don't negate
				IntRes2[i] := IntRes1[i]
			FI
		ENDFOR

		// output
		dst := IntRes2[0]

Instruction: 'PCMPISTRI'. Intrinsic: '_mm_cmpistro'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Cmpistrs ¶

func Cmpistrs(a x86.M128i, b x86.M128i, imm8 byte) int

Cmpistrs: Compare packed strings with implicit lengths in 'a' and 'b' using the control in 'imm8', and returns 1 if any character in 'a' was null, and 0 otherwise.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		aInvalid := 0
		FOR i := 0 to UpperBound
			m := i*size
			IF b[m+size-1:m] == 0
				aInvalid := 1
			FI
		ENDFOR

		dst := aInvalid

Instruction: 'PCMPISTRI'. Intrinsic: '_mm_cmpistrs'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Cmpistrz ¶

func Cmpistrz(a x86.M128i, b x86.M128i, imm8 byte) int

Cmpistrz: Compare packed strings with implicit lengths in 'a' and 'b' using the control in 'imm8', and returns 1 if any character in 'b' was null, and 0 otherwise.

	'imm' can be a combination of:
    _SIDD_UBYTE_OPS                // unsigned 8-bit characters
    _SIDD_UWORD_OPS                // unsigned 16-bit characters
    _SIDD_SBYTE_OPS                // signed 8-bit characters
    _SIDD_SWORD_OPS                // signed 16-bit characters
    _SIDD_CMP_EQUAL_ANY            // compare equal any
    _SIDD_CMP_RANGES               // compare ranges
    _SIDD_CMP_EQUAL_EACH           // compare equal each
    _SIDD_CMP_EQUAL_ORDERED        // compare equal ordered
    _SIDD_NEGATIVE_POLARITY        // negate results
    _SIDD_MASKED_NEGATIVE_POLARITY // negate results only before end of string
    _SIDD_LEAST_SIGNIFICANT        // index only: return last significant bit
    _SIDD_MOST_SIGNIFICANT         // index only: return most significant bit
    _SIDD_BIT_MASK                 // mask only: return bit mask
    _SIDD_UNIT_MASK                // mask only: return byte/word mask

		size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
		UpperBound := (128 / size) - 1

		bInvalid := 0
		FOR j := 0 to UpperBound
			n := j*size
			IF b[n+size-1:n] == 0
				bInvalid := 1
			FI
		ENDFOR

		dst := bInvalid

Instruction: 'PCMPISTRI'. Intrinsic: '_mm_cmpistrz'. Requires SSE4.2.

FIXME: Requires compiler support (has immediate)

func Crc32U16 ¶

func Crc32U16(crc uint32, v uint16) uint32

Crc32U16: Starting with the initial value in 'crc', accumulates a CRC32 value for unsigned 16-bit integer 'v', and stores the result in 'dst'.

tmp1[15:0] := v[0:15] // bit reflection
tmp2[31:0] := crc[0:31] // bit reflection
tmp3[47:0] := tmp1[15:0] << 32
tmp4[47:0] := tmp2[31:0] << 16
tmp5[47:0] := tmp3[47:0] XOR tmp4[47:0]
tmp6[31:0] := tmp5[47:0] MOD2 0x11EDC6F41
dst[31:0] := tmp6[0:31] // bit reflection

Instruction: 'CRC32'. Intrinsic: '_mm_crc32_u16'. Requires SSE4.2.

func Crc32U32 ¶

func Crc32U32(crc uint32, v uint32) uint32

Crc32U32: Starting with the initial value in 'crc', accumulates a CRC32 value for unsigned 32-bit integer 'v', and stores the result in 'dst'.

tmp1[31:0] := v[0:31] // bit reflection
tmp2[31:0] := crc[0:31] // bit reflection
tmp3[63:0] := tmp1[31:0] << 32
tmp4[63:0] := tmp2[31:0] << 32
tmp5[63:0] := tmp3[63:0] XOR tmp4[63:0]
tmp6[31:0] := tmp5[63:0] MOD2 0x11EDC6F41
dst[31:0] := tmp6[0:31] // bit reflection

Instruction: 'CRC32'. Intrinsic: '_mm_crc32_u32'. Requires SSE4.2.

func Crc32U64 ¶

func Crc32U64(crc uint64, v uint64) uint64

Crc32U64: Starting with the initial value in 'crc', accumulates a CRC32 value for unsigned 64-bit integer 'v', and stores the result in 'dst'.

tmp1[63:0] := v[0:63] // bit reflection
tmp2[31:0] := crc[0:31] // bit reflection
tmp3[95:0] := tmp1[31:0] << 32
tmp4[95:0] := tmp2[63:0] << 64
tmp5[95:0] := tmp3[95:0] XOR tmp4[95:0]
tmp6[31:0] := tmp5[95:0] MOD2 0x11EDC6F41
dst[31:0] := tmp6[0:31] // bit reflection

Instruction: 'CRC32'. Intrinsic: '_mm_crc32_u64'. Requires SSE4.2.

func Crc32U8 ¶

func Crc32U8(crc uint32, v uint8) uint32

Crc32U8: Starting with the initial value in 'crc', accumulates a CRC32 value for unsigned 8-bit integer 'v', and stores the result in 'dst'.

tmp1[7:0] := v[0:7] // bit reflection
tmp2[31:0] := crc[0:31] // bit reflection
tmp3[39:0] := tmp1[7:0] << 32
tmp4[39:0] := tmp2[31:0] << 8
tmp5[39:0] := tmp3[39:0] XOR tmp4[39:0]
tmp6[31:0] := tmp5[39:0] MOD2 0x11EDC6F41
dst[31:0] := tmp6[0:31] // bit reflection

Instruction: 'CRC32'. Intrinsic: '_mm_crc32_u8'. Requires SSE4.2.

func Cvtepi16Epi32 ¶

func Cvtepi16Epi32(a x86.M128i) (dst x86.M128i)

Cvtepi16Epi32: Sign extend packed 16-bit integers in 'a' to packed 32-bit integers, and store the results in 'dst'.

FOR j := 0 to 3
	i := 32*j
	k := 16*j
	dst[i+31:i] := SignExtend(a[k+15:k])
ENDFOR

Instruction: 'PMOVSXWD'. Intrinsic: '_mm_cvtepi16_epi32'. Requires SSE4.1.

func Cvtepi16Epi64 ¶

func Cvtepi16Epi64(a x86.M128i) (dst x86.M128i)

Cvtepi16Epi64: Sign extend packed 16-bit integers in 'a' to packed 64-bit integers, and store the results in 'dst'.

FOR j := 0 to 1
	i := 64*j
	k := 16*j
	dst[i+63:i] := SignExtend(a[k+15:k])
ENDFOR

Instruction: 'PMOVSXWQ'. Intrinsic: '_mm_cvtepi16_epi64'. Requires SSE4.1.

func Cvtepi32Epi64 ¶

func Cvtepi32Epi64(a x86.M128i) (dst x86.M128i)

Cvtepi32Epi64: Sign extend packed 32-bit integers in 'a' to packed 64-bit integers, and store the results in 'dst'.

FOR j := 0 to 1
	i := 64*j
	k := 32*j
	dst[i+63:i] := SignExtend(a[k+31:k])
ENDFOR

Instruction: 'PMOVSXDQ'. Intrinsic: '_mm_cvtepi32_epi64'. Requires SSE4.1.

func Cvtepi8Epi16 ¶

func Cvtepi8Epi16(a x86.M128i) (dst x86.M128i)

Cvtepi8Epi16: Sign extend packed 8-bit integers in 'a' to packed 16-bit integers, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	l := j*16
	dst[l+15:l] := SignExtend(a[i+7:i])
ENDFOR

Instruction: 'PMOVSXBW'. Intrinsic: '_mm_cvtepi8_epi16'. Requires SSE4.1.

func Cvtepi8Epi32 ¶

func Cvtepi8Epi32(a x86.M128i) (dst x86.M128i)

Cvtepi8Epi32: Sign extend packed 8-bit integers in 'a' to packed 32-bit integers, and store the results in 'dst'.

FOR j := 0 to 3
	i := 32*j
	k := 8*j
	dst[i+31:i] := SignExtend(a[k+7:k])
ENDFOR

Instruction: 'PMOVSXBD'. Intrinsic: '_mm_cvtepi8_epi32'. Requires SSE4.1.

func Cvtepi8Epi64 ¶

func Cvtepi8Epi64(a x86.M128i) (dst x86.M128i)

Cvtepi8Epi64: Sign extend packed 8-bit integers in the low 8 bytes of 'a' to packed 64-bit integers, and store the results in 'dst'.

FOR j := 0 to 1
	i := 64*j
	k := 8*j
	dst[i+63:i] := SignExtend(a[k+7:k])
ENDFOR

Instruction: 'PMOVSXBQ'. Intrinsic: '_mm_cvtepi8_epi64'. Requires SSE4.1.

func Cvtepu16Epi32 ¶

func Cvtepu16Epi32(a x86.M128i) (dst x86.M128i)

Cvtepu16Epi32: Zero extend packed unsigned 16-bit integers in 'a' to packed 32-bit integers, and store the results in 'dst'.

FOR j := 0 to 3
	i := 32*j
	k := 16*j
	dst[i+31:i] := ZeroExtend(a[k+15:k])
ENDFOR

Instruction: 'PMOVZXWD'. Intrinsic: '_mm_cvtepu16_epi32'. Requires SSE4.1.

func Cvtepu16Epi64 ¶

func Cvtepu16Epi64(a x86.M128i) (dst x86.M128i)

Cvtepu16Epi64: Zero extend packed unsigned 16-bit integers in 'a' to packed 64-bit integers, and store the results in 'dst'.

FOR j := 0 to 1
	i := 64*j
	k := 16*j
	dst[i+63:i] := ZeroExtend(a[k+15:k])
ENDFOR

Instruction: 'PMOVZXWQ'. Intrinsic: '_mm_cvtepu16_epi64'. Requires SSE4.1.

func Cvtepu32Epi64 ¶

func Cvtepu32Epi64(a x86.M128i) (dst x86.M128i)

Cvtepu32Epi64: Zero extend packed unsigned 32-bit integers in 'a' to packed 64-bit integers, and store the results in 'dst'.

FOR j := 0 to 1
	i := 64*j
	k := 32*j
	dst[i+63:i] := ZeroExtend(a[k+31:k])
ENDFOR

Instruction: 'PMOVZXDQ'. Intrinsic: '_mm_cvtepu32_epi64'. Requires SSE4.1.

func Cvtepu8Epi16 ¶

func Cvtepu8Epi16(a x86.M128i) (dst x86.M128i)

Cvtepu8Epi16: Zero extend packed unsigned 8-bit integers in 'a' to packed 16-bit integers, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	l := j*16
	dst[l+15:l] := ZeroExtend(a[i+7:i])
ENDFOR

Instruction: 'PMOVZXBW'. Intrinsic: '_mm_cvtepu8_epi16'. Requires SSE4.1.

func Cvtepu8Epi32 ¶

func Cvtepu8Epi32(a x86.M128i) (dst x86.M128i)

Cvtepu8Epi32: Zero extend packed unsigned 8-bit integers in 'a' to packed 32-bit integers, and store the results in 'dst'.

FOR j := 0 to 3
	i := 32*j
	k := 8*j
	dst[i+31:i] := ZeroExtend(a[k+7:k])
ENDFOR

Instruction: 'PMOVZXBD'. Intrinsic: '_mm_cvtepu8_epi32'. Requires SSE4.1.

func Cvtepu8Epi64 ¶

func Cvtepu8Epi64(a x86.M128i) (dst x86.M128i)

Cvtepu8Epi64: Zero extend packed unsigned 8-bit integers in the low 8 byte sof 'a' to packed 64-bit integers, and store the results in 'dst'.

FOR j := 0 to 1
	i := 64*j
	k := 8*j
	dst[i+63:i] := ZeroExtend(a[k+7:k])
ENDFOR

Instruction: 'PMOVZXBQ'. Intrinsic: '_mm_cvtepu8_epi64'. Requires SSE4.1.

func DpPd ¶

func DpPd(a x86.M128d, b x86.M128d, imm8 byte) (dst x86.M128d)

DpPd: Conditionally multiply the packed double-precision (64-bit) floating-point elements in 'a' and 'b' using the high 4 bits in 'imm8', sum the four products, and conditionally store the sum in 'dst' using the low 4 bits of 'imm8'.

DP(a[127:0], b[127:0], imm8[7:0]) {
	FOR j := 0 to 1
		i := j*64
		IF imm8[(4+j)%8]]
			temp[i+63:i] := a[i+63:i] * b[i+63:i]
		ELSE
			temp[i+63:i] := 0
		FI
	ENDFOR

	sum[63:0] := temp[127:64] + temp[63:0]

	FOR j := 0 to 1
		i := j*64
		IF imm8[j%8]
			tmpdst[i+63:i] := sum[63:0]
		ELSE
			tmpdst[i+63:i] := 0
		FI
	ENDFOR
	RETURN tmpdst[127:0]
}

dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0])

Instruction: 'DPPD'. Intrinsic: '_mm_dp_pd'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func DpPs ¶

func DpPs(a x86.M128, b x86.M128, imm8 byte) (dst x86.M128)

DpPs: Conditionally multiply the packed single-precision (32-bit) floating-point elements in 'a' and 'b' using the high 4 bits in 'imm8', sum the four products, and conditionally store the sum in 'dst' using the low 4 bits of 'imm8'.

DP(a[127:0], b[127:0], imm8[7:0]) {
	FOR j := 0 to 3
		i := j*32
		IF imm8[(4+j)%8]
			temp[i+31:i] := a[i+31:i] * b[i+31:i]
		ELSE
			temp[i+31:i] := 0
		FI
	ENDFOR

	sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0])

	FOR j := 0 to 3
		i := j*32
		IF imm8[j%8]
			tmpdst[i+31:i] := sum[31:0]
		ELSE
			tmpdst[i+31:i] := 0
		FI
	ENDFOR
	RETURN tmpdst[127:0]
}

dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0])

Instruction: 'DPPS'. Intrinsic: '_mm_dp_ps'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func ExtractEpi32 ¶

func ExtractEpi32(a x86.M128i, imm8 byte) int

ExtractEpi32: Extract a 32-bit integer from 'a', selected with 'imm8', and store the result in 'dst'.

dst[31:0] := (a[127:0] >> (imm8[1:0] * 32))[31:0]

Instruction: 'PEXTRD'. Intrinsic: '_mm_extract_epi32'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func ExtractEpi64 ¶

func ExtractEpi64(a x86.M128i, imm8 byte) int64

ExtractEpi64: Extract a 64-bit integer from 'a', selected with 'imm8', and store the result in 'dst'.

dst[63:0] := (a[127:0] >> (imm8[0] * 64))[63:0]

Instruction: 'PEXTRQ'. Intrinsic: '_mm_extract_epi64'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func ExtractEpi8 ¶

func ExtractEpi8(a x86.M128i, imm8 byte) int

ExtractEpi8: Extract an 8-bit integer from 'a', selected with 'imm8', and store the result in the lower element of 'dst'.

dst[7:0] := (a[127:0] >> (imm8[3:0] * 8))[7:0]
dst[31:8] := 0

Instruction: 'PEXTRB'. Intrinsic: '_mm_extract_epi8'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func ExtractPs ¶

func ExtractPs(a x86.M128, imm8 byte) int

ExtractPs: Extract a single-precision (32-bit) floating-point element from 'a', selected with 'imm8', and store the result in 'dst'.

dst[31:0] := (a[127:0] >> (imm8[1:0] * 32))[31:0]

Instruction: 'EXTRACTPS'. Intrinsic: '_mm_extract_ps'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func FloorPd ¶

func FloorPd(a x86.M128d) (dst x86.M128d)

FloorPd: Round the packed double-precision (64-bit) floating-point elements in 'a' down to an integer value, and store the results as packed double-precision floating-point elements in 'dst'.

FOR j := 0 to 1
	i := j*64
	dst[i+63:i] := FLOOR(a[i+63:i])
ENDFOR

Instruction: 'ROUNDPD'. Intrinsic: '_mm_floor_pd'. Requires SSE4.1.

func FloorPs ¶

func FloorPs(a x86.M128) (dst x86.M128)

FloorPs: Round the packed single-precision (32-bit) floating-point elements in 'a' down to an integer value, and store the results as packed single-precision floating-point elements in 'dst'.

FOR j := 0 to 3
	i := j*32
	dst[i+31:i] := FLOOR(a[i+31:i])
ENDFOR

Instruction: 'ROUNDPS'. Intrinsic: '_mm_floor_ps'. Requires SSE4.1.

func FloorSd ¶

func FloorSd(a x86.M128d, b x86.M128d) (dst x86.M128d)

FloorSd: Round the lower double-precision (64-bit) floating-point element in 'b' down to an integer value, store the result as a double-precision floating-point element in the lower element of 'dst', and copy the upper element from 'a' to the upper element of 'dst'.

dst[63:0] := FLOOR(b[63:0])
dst[127:64] := a[127:64]

Instruction: 'ROUNDSD'. Intrinsic: '_mm_floor_sd'. Requires SSE4.1.

func FloorSs ¶

func FloorSs(a x86.M128, b x86.M128) (dst x86.M128)

FloorSs: Round the lower single-precision (32-bit) floating-point element in 'b' down to an integer value, store the result as a single-precision floating-point element in the lower element of 'dst', and copy the upper 3 packed elements from 'a' to the upper elements of 'dst'.

dst[31:0] := FLOOR(b[31:0])
dst[127:32] := a[127:32]

Instruction: 'ROUNDSS'. Intrinsic: '_mm_floor_ss'. Requires SSE4.1.

func InsertEpi32 ¶

func InsertEpi32(a x86.M128i, i int, imm8 byte) (dst x86.M128i)

InsertEpi32: Copy 'a' to 'dst', and insert the 32-bit integer 'i' into 'dst' at the location specified by 'imm8'.

dst[127:0] := a[127:0]
sel := imm8[1:0]*32
dst[sel+31:sel] := i[31:0]

Instruction: 'PINSRD'. Intrinsic: '_mm_insert_epi32'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func InsertEpi64 ¶

func InsertEpi64(a x86.M128i, i int64, imm8 byte) (dst x86.M128i)

InsertEpi64: Copy 'a' to 'dst', and insert the 64-bit integer 'i' into 'dst' at the location specified by 'imm8'.

dst[127:0] := a[127:0]
sel := imm8[0]*64
dst[sel+63:sel] := i[63:0]

Instruction: 'PINSRQ'. Intrinsic: '_mm_insert_epi64'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func InsertEpi8 ¶

func InsertEpi8(a x86.M128i, i int, imm8 byte) (dst x86.M128i)

InsertEpi8: Copy 'a' to 'dst', and insert the lower 8-bit integer from 'i' into 'dst' at the location specified by 'imm8'.

dst[127:0] := a[127:0]
sel := imm8[3:0]*8
dst[sel+7:sel] := i[7:0]

Instruction: 'PINSRB'. Intrinsic: '_mm_insert_epi8'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func InsertPs ¶

func InsertPs(a x86.M128, b x86.M128, imm8 byte) (dst x86.M128)

InsertPs: Copy 'a' to 'tmp', then insert a single-precision (32-bit) floating-point element from 'b' into 'tmp' using the control in 'imm8'. Store 'tmp' to 'dst' using the mask in 'imm8' (elements are zeroed out when the corresponding bit is set).

tmp2[127:0] := a[127:0]
CASE (imm8[7:6]) of
0: tmp1[31:0] := b[31:0]
1: tmp1[31:0] := b[63:32]
2: tmp1[31:0] := b[95:64]
3: tmp1[31:0] := b[127:96]
ESAC
CASE (imm8[5:4]) of
0: tmp2[31:0] := tmp1[31:0]
1: tmp2[63:32] := tmp1[31:0]
2: tmp2[95:64] := tmp1[31:0]
3: tmp2[127:96] := tmp1[31:0]
ESAC
FOR j := 0 to 3
	i := j*32
	IF imm8[j%8]
		dst[i+31:i] := 0
	ELSE
		dst[i+31:i] := tmp2[i+31:i]
	FI
ENDFOR

Instruction: 'INSERTPS'. Intrinsic: '_mm_insert_ps'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func MaxEpi32 ¶

func MaxEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)

MaxEpi32: Compare packed 32-bit integers in 'a' and 'b', and store packed maximum values in 'dst'.

FOR j := 0 to 3
	i := j*32
	IF a[i+31:i] > b[i+31:i]
		dst[i+31:i] := a[i+31:i]
	ELSE
		dst[i+31:i] := b[i+31:i]
	FI
ENDFOR

Instruction: 'PMAXSD'. Intrinsic: '_mm_max_epi32'. Requires SSE4.1.

func MaxEpi8 ¶

func MaxEpi8(a x86.M128i, b x86.M128i) (dst x86.M128i)

MaxEpi8: Compare packed 8-bit integers in 'a' and 'b', and store packed maximum values in 'dst'.

FOR j := 0 to 15
	i := j*8
	IF a[i+7:i] > b[i+7:i]
		dst[i+7:i] := a[i+7:i]
	ELSE
		dst[i+7:i] := b[i+7:i]
	FI
ENDFOR

Instruction: 'PMAXSB'. Intrinsic: '_mm_max_epi8'. Requires SSE4.1.

func MaxEpu16 ¶

func MaxEpu16(a x86.M128i, b x86.M128i) (dst x86.M128i)

MaxEpu16: Compare packed unsigned 16-bit integers in 'a' and 'b', and store packed maximum values in 'dst'.

FOR j := 0 to 7
	i := j*16
	IF a[i+15:i] > b[i+15:i]
		dst[i+15:i] := a[i+15:i]
	ELSE
		dst[i+15:i] := b[i+15:i]
	FI
ENDFOR

Instruction: 'PMAXUW'. Intrinsic: '_mm_max_epu16'. Requires SSE4.1.

func MaxEpu32 ¶

func MaxEpu32(a x86.M128i, b x86.M128i) (dst x86.M128i)

MaxEpu32: Compare packed unsigned 32-bit integers in 'a' and 'b', and store packed maximum values in 'dst'.

FOR j := 0 to 3
	i := j*32
	IF a[i+31:i] > b[i+31:i]
		dst[i+31:i] := a[i+31:i]
	ELSE
		dst[i+31:i] := b[i+31:i]
	FI
ENDFOR

Instruction: 'PMAXUD'. Intrinsic: '_mm_max_epu32'. Requires SSE4.1.

func MinEpi32 ¶

func MinEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)

MinEpi32: Compare packed 32-bit integers in 'a' and 'b', and store packed minimum values in 'dst'.

FOR j := 0 to 3
	i := j*32
	IF a[i+31:i] < b[i+31:i]
		dst[i+31:i] := a[i+31:i]
	ELSE
		dst[i+31:i] := b[i+31:i]
	FI
ENDFOR

Instruction: 'PMINSD'. Intrinsic: '_mm_min_epi32'. Requires SSE4.1.

func MinEpi8 ¶

func MinEpi8(a x86.M128i, b x86.M128i) (dst x86.M128i)

MinEpi8: Compare packed 8-bit integers in 'a' and 'b', and store packed minimum values in 'dst'.

FOR j := 0 to 15
	i := j*8
	IF a[i+7:i] < b[i+7:i]
		dst[i+7:i] := a[i+7:i]
	ELSE
		dst[i+7:i] := b[i+7:i]
	FI
ENDFOR

Instruction: 'PMINSB'. Intrinsic: '_mm_min_epi8'. Requires SSE4.1.

func MinEpu16 ¶

func MinEpu16(a x86.M128i, b x86.M128i) (dst x86.M128i)

MinEpu16: Compare packed unsigned 16-bit integers in 'a' and 'b', and store packed minimum values in 'dst'.

FOR j := 0 to 7
	i := j*16
	IF a[i+15:i] < b[i+15:i]
		dst[i+15:i] := a[i+15:i]
	ELSE
		dst[i+15:i] := b[i+15:i]
	FI
ENDFOR

Instruction: 'PMINUW'. Intrinsic: '_mm_min_epu16'. Requires SSE4.1.

func MinEpu32 ¶

func MinEpu32(a x86.M128i, b x86.M128i) (dst x86.M128i)

MinEpu32: Compare packed unsigned 32-bit integers in 'a' and 'b', and store packed minimum values in 'dst'.

FOR j := 0 to 3
	i := j*32
	IF a[i+31:i] < b[i+31:i]
		dst[i+31:i] := a[i+31:i]
	ELSE
		dst[i+31:i] := b[i+31:i]
	FI
ENDFOR

Instruction: 'PMINUD'. Intrinsic: '_mm_min_epu32'. Requires SSE4.1.

func MinposEpu16 ¶

func MinposEpu16(a x86.M128i) (dst x86.M128i)

MinposEpu16: Horizontally compute the minimum amongst the packed unsigned 16-bit integers in 'a', store the minimum and index in 'dst', and zero the remaining bits in 'dst'.

index[2:0] := 0
min[15:0] := a[15:0]
FOR j := 0 to 7
	i := j*16
	IF a[i+15:i] < min[15:0]
		index[2:0] := j
		min[15:0] := a[i+15:i]
	FI
ENDFOR
dst[15:0] := min[15:0]
dst[18:16] := index[2:0]
dst[127:19] := 0

Instruction: 'PHMINPOSUW'. Intrinsic: '_mm_minpos_epu16'. Requires SSE4.1.

func MpsadbwEpu8 ¶

func MpsadbwEpu8(a x86.M128i, b x86.M128i, imm8 byte) (dst x86.M128i)

MpsadbwEpu8: Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in 'a' compared to those in 'b', and store the 16-bit results in 'dst'.

Eight SADs are performed using one quadruplet from 'b' and eight

quadruplets from 'a'. One quadruplet is selected from 'b' starting at on the offset specified in 'imm8'. Eight quadruplets are formed from sequential 8-bit integers selected from 'a' starting at the offset specified in 'imm8'.

MPSADBW(a[127:0], b[127:0], imm8[2:0]) {
	a_offset := imm8[2]*32
	b_offset := imm8[1:0]*32
	FOR j := 0 to 7
		i := j*8
		k := a_offset+i
		l := b_offset
		tmp[i+15:i] := ABS(a[k+7:k] - b[l+7:l]) + ABS(a[k+15:k+8] - b[l+15:l+8]) + ABS(a[k+23:k+16] - b[l+23:l+16]) + ABS(a[k+31:k+24] - b[l+31:l+24])
	ENDFOR
	RETURN tmp[127:0]
}

dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0])

Instruction: 'MPSADBW'. Intrinsic: '_mm_mpsadbw_epu8'. Requires SSE4.1.

FIXME: Requires compiler support (has immediate)

func MulEpi32 ¶

func MulEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)

MulEpi32: Multiply the low 32-bit integers from each packed 64-bit element in 'a' and 'b', and store the signed 64-bit results in 'dst'.

FOR j := 0 to 1
	i := j*64
	dst[i+63:i] := a[i+31:i] * b[i+31:i]
ENDFOR

Instruction: 'PMULDQ'. Intrinsic: '_mm_mul_epi32'. Requires SSE4.1.

func MulloEpi32 ¶

func MulloEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)

MulloEpi32: Multiply the packed 32-bit integers in 'a' and 'b', producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in 'dst'.

FOR j := 0 to 3
	i := j*32
	tmp[63:0] := a[i+31:i] * b[i+31:i]
	dst[i+31:i] := tmp[31:0]
ENDFOR

Instruction: 'PMULLD'. Intrinsic: '_mm_mullo_epi32'. Requires SSE4.1.

func PackusEpi32 ¶

func PackusEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)

PackusEpi32: Convert packed 32-bit integers from 'a' and 'b' to packed 16-bit integers using unsigned saturation, and store the results in 'dst'.

dst[15:0] := Saturate_Int32_To_UnsignedInt16 (a[31:0])
dst[31:16] := Saturate_Int32_To_UnsignedInt16 (a[63:32])
dst[47:32] := Saturate_Int32_To_UnsignedInt16 (a[95:64])
dst[63:48] := Saturate_Int32_To_UnsignedInt16 (a[127:96])
dst[79:64] := Saturate_Int32_To_UnsignedInt16 (b[31:0])
dst[95:80] := Saturate_Int32_To_UnsignedInt16 (b[63:32])
dst[111:96] := Saturate_Int32_To_UnsignedInt16 (b[95:64])
dst[127:112] := Saturate_Int32_To_UnsignedInt16 (b[127:96])

Instruction: 'PACKUSDW'. Intrinsic: '_mm_packus_epi32'. Requires SSE4.1.

func RoundPd ¶

func RoundPd(a x86.M128d, rounding int) (dst x86.M128d)

RoundPd: Round the packed double-precision (64-bit) floating-point elements in 'a' using the 'rounding' parameter, and store the results as packed double-precision floating-point elements in 'dst'.

Rounding is done according to the 'rounding' parameter, which can be one

of:

    (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
    (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)     // round down, and suppress exceptions
    (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)     // round up, and suppress exceptions
    (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)        // truncate, and suppress exceptions
    _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE

		FOR j := 0 to 1
			i := j*64
			dst[i+63:i] := ROUND(a[i+63:i])
		ENDFOR

Instruction: 'ROUNDPD'. Intrinsic: '_mm_round_pd'. Requires SSE4.1.

func RoundPs ¶

func RoundPs(a x86.M128, rounding int) (dst x86.M128)

RoundPs: Round the packed single-precision (32-bit) floating-point elements in 'a' using the 'rounding' parameter, and store the results as packed single-precision floating-point elements in 'dst'.

Rounding is done according to the 'rounding' parameter, which can be one

of:

    (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
    (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)     // round down, and suppress exceptions
    (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)     // round up, and suppress exceptions
    (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)        // truncate, and suppress exceptions
    _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE

		FOR j := 0 to 3
			i := j*32
			dst[i+31:i] := ROUND(a[i+31:i])
		ENDFOR

Instruction: 'ROUNDPS'. Intrinsic: '_mm_round_ps'. Requires SSE4.1.

func RoundSd ¶

func RoundSd(a x86.M128d, b x86.M128d, rounding int) (dst x86.M128d)

RoundSd: Round the lower double-precision (64-bit) floating-point element in 'b' using the 'rounding' parameter, store the result as a double-precision floating-point element in the lower element of 'dst', and copy the upper element from 'a' to the upper element of 'dst'.

Rounding is done according to the 'rounding' parameter, which can be one

of:

    (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
    (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)     // round down, and suppress exceptions
    (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)     // round up, and suppress exceptions
    (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)        // truncate, and suppress exceptions
    _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE

		dst[63:0] := ROUND(b[63:0])
		dst[127:64] := a[127:64]

Instruction: 'ROUNDSD'. Intrinsic: '_mm_round_sd'. Requires SSE4.1.

func RoundSs ¶

func RoundSs(a x86.M128, b x86.M128, rounding int) (dst x86.M128)

RoundSs: Round the lower single-precision (32-bit) floating-point element in 'b' using the 'rounding' parameter, store the result as a single-precision floating-point element in the lower element of 'dst', and copy the upper 3 packed elements from 'a' to the upper elements of 'dst'.

Rounding is done according to the 'rounding' parameter, which can be one

of:

    (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
    (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)     // round down, and suppress exceptions
    (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)     // round up, and suppress exceptions
    (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)        // truncate, and suppress exceptions
    _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE

		dst[31:0] := ROUND(b[31:0])
		dst[127:32] := a[127:32]

Instruction: 'ROUNDSS'. Intrinsic: '_mm_round_ss'. Requires SSE4.1.

func StreamLoadSi128 ¶

func StreamLoadSi128(mem_addr *x86.M128i) (dst x86.M128i)

StreamLoadSi128: Load 128-bits of integer data from memory into 'dst' using a non-temporal memory hint.

'mem_addr' must be aligned on a 16-byte boundary or a general-protection

exception may be generated.

dst[127:0] := MEM[mem_addr+127:mem_addr]

Instruction: 'MOVNTDQA'. Intrinsic: '_mm_stream_load_si128'. Requires SSE4.1.

FIXME: Will likely need to be reworked (has pointer parameter).

func TestAllOnes ¶

func TestAllOnes(a x86.M128i) int

TestAllOnes: Compute the complement of 'a' and 0xFFFFFFFF, and return 1 if the result is zero, otherwise return 0.

IF (a[127:0] AND NOT 0xFFFFFFFF == 0)
	CF := 1
ELSE
	CF := 0
FI
RETURN CF

Instruction: '...'. Intrinsic: '_mm_test_all_ones'. Requires SSE4.1.

func TestAllZeros ¶

func TestAllZeros(a x86.M128i, mask x86.M128i) int

TestAllZeros: Compute the bitwise AND of 128 bits (representing integer data) in 'a' and 'mask', and return 1 if the result is zero, otherwise return 0.

IF (a[127:0] AND mask[127:0] == 0)
	ZF := 1
ELSE
	ZF := 0
FI
RETURN ZF

Instruction: 'PTEST'. Intrinsic: '_mm_test_all_zeros'. Requires SSE4.1.

func TestMixOnesZeros ¶

func TestMixOnesZeros(a x86.M128i, mask x86.M128i) int

TestMixOnesZeros: Compute the bitwise AND of 128 bits (representing integer data) in 'a' and 'mask', and set 'ZF' to 1 if the result is zero, otherwise set 'ZF' to 0. Compute the bitwise AND NOT of 'a' and 'mask', and set 'CF' to 1 if the result is zero, otherwise set 'CF' to 0. Return 1 if both the 'ZF' and 'CF' values are zero, otherwise return 0.

IF (a[127:0] AND mask[127:0] == 0)
	ZF := 1
ELSE
	ZF := 0
FI
IF (a[127:0] AND NOT mask[127:0] == 0)
	CF := 1
ELSE
	CF := 0
FI
IF (ZF == 0 && CF == 0)
	RETURN 1
ELSE
	RETURN 0
FI

Instruction: 'PTEST'. Intrinsic: '_mm_test_mix_ones_zeros'. Requires SSE4.1.

func TestcSi128 ¶

func TestcSi128(a x86.M128i, b x86.M128i) int

TestcSi128: Compute the bitwise AND of 128 bits (representing integer data) in 'a' and 'b', and set 'ZF' to 1 if the result is zero, otherwise set 'ZF' to 0. Compute the bitwise AND NOT of 'a' and 'b', and set 'CF' to 1 if the result is zero, otherwise set 'CF' to 0. Return the 'CF' value.

IF (a[127:0] AND b[127:0] == 0)
	ZF := 1
ELSE
	ZF := 0
FI
IF (a[127:0] AND NOT b[127:0] == 0)
	CF := 1
ELSE
	CF := 0
FI
RETURN CF

Instruction: 'PTEST'. Intrinsic: '_mm_testc_si128'. Requires SSE4.1.

func TestnzcSi128 ¶

func TestnzcSi128(a x86.M128i, b x86.M128i) int

TestnzcSi128: Compute the bitwise AND of 128 bits (representing integer data) in 'a' and 'b', and set 'ZF' to 1 if the result is zero, otherwise set 'ZF' to 0. Compute the bitwise AND NOT of 'a' and 'b', and set 'CF' to 1 if the result is zero, otherwise set 'CF' to 0. Return 1 if both the 'ZF' and 'CF' values are zero, otherwise return 0.

IF (a[127:0] AND b[127:0] == 0)
	ZF := 1
ELSE
	ZF := 0
FI
IF (a[127:0] AND NOT b[127:0] == 0)
	CF := 1
ELSE
	CF := 0
FI
IF (ZF == 0 && CF == 0)
	RETURN 1
ELSE
	RETURN 0
FI

Instruction: 'PTEST'. Intrinsic: '_mm_testnzc_si128'. Requires SSE4.1.

func TestzSi128 ¶

func TestzSi128(a x86.M128i, b x86.M128i) int

TestzSi128: Compute the bitwise AND of 128 bits (representing integer data) in 'a' and 'b', and set 'ZF' to 1 if the result is zero, otherwise set 'ZF' to 0. Compute the bitwise AND NOT of 'a' and 'b', and set 'CF' to 1 if the result is zero, otherwise set 'CF' to 0. Return the 'ZF' value.

IF (a[127:0] AND b[127:0] == 0)
	ZF := 1
ELSE
	ZF := 0
FI
IF (a[127:0] AND NOT b[127:0] == 0)
	CF := 1
ELSE
	CF := 0
FI
RETURN ZF

Instruction: 'PTEST'. Intrinsic: '_mm_testz_si128'. Requires SSE4.1.

Types ¶

This section is empty.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL