Floating Point Misc

__m128i __lsx_vfclass_d (__m128d a)

Synopsis

__m128i __lsx_vfclass_d (__m128d a)
#include <lsxintrin.h>
Instruction: vfclass.d vr, vr
CPU Flags: LSX

Description

Classifiy each double precision floating point elements in a.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = fp_classify(a.fp64[i]);
}

Latency and Throughput

CPU Latency Throughput (IPC)
3A6000 2 4
3C5000 2 2

__m128i __lsx_vfclass_s (__m128 a)

Synopsis

__m128i __lsx_vfclass_s (__m128 a)
#include <lsxintrin.h>
Instruction: vfclass.s vr, vr
CPU Flags: LSX

Description

Classifiy each single precision floating point elements in a.

Operation

for (int i = 0; i < 4; i++) {
  dst.word[i] = fp_classify(a.fp32[i]);
}

Latency and Throughput

CPU Latency Throughput (IPC)
3A6000 2 4
3C5000 2 2

__m128 __lsx_vfrint_s (__m128 a)

Synopsis

__m128 __lsx_vfrint_s (__m128 a)
#include <lsxintrin.h>
Instruction: vfrint.s vr, vr
CPU Flags: LSX

Description

Round single-precision floating point elements in a to integers, using current rounding mode specified in fscr, and store as floating point numbers.

Operation

for (int i = 0; i < 4; i++) {
  dst.fp32[i] = (fp32)(s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU Latency Throughput (IPC)
3A6000 4 2
3C5000 4 2

__m128d __lsx_vfrint_d (__m128d a)

Synopsis

__m128d __lsx_vfrint_d (__m128d a)
#include <lsxintrin.h>
Instruction: vfrint.d vr, vr
CPU Flags: LSX

Description

Round single-precision floating point elements in a to integers, using current rounding mode specified in fscr, and store as floating point numbers.

Operation

for (int i = 0; i < 2; i++) {
  dst.fp64[i] = (fp64)(s64)a.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU Latency Throughput (IPC)
3A6000 4 2
3C5000 4 2

__m128 __lsx_vfrintrp_s (__m128 a)

Synopsis

__m128 __lsx_vfrintrp_s (__m128 a)
#include <lsxintrin.h>
Instruction: vfrintrp.s vr, vr
CPU Flags: LSX

Description

Round single-precision floating point elements in a to integers, rounding towards positive infinity, and store as floating point numbers.

Operation

for (int i = 0; i < 4; i++) {
  dst.fp32[i] = (fp32)(s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU Latency Throughput (IPC)
3A6000 4 2
3C5000 4 2

__m128d __lsx_vfrintrp_d (__m128d a)

Synopsis

__m128d __lsx_vfrintrp_d (__m128d a)
#include <lsxintrin.h>
Instruction: vfrintrp.d vr, vr
CPU Flags: LSX

Description

Round single-precision floating point elements in a to integers, rounding towards positive infinity, and store as floating point numbers.

Operation

for (int i = 0; i < 2; i++) {
  dst.fp64[i] = (fp64)(s64)a.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU Latency Throughput (IPC)
3A6000 4 2
3C5000 4 2

__m128 __lsx_vfrintrm_s (__m128 a)

Synopsis

__m128 __lsx_vfrintrm_s (__m128 a)
#include <lsxintrin.h>
Instruction: vfrintrm.s vr, vr
CPU Flags: LSX

Description

Round single-precision floating point elements in a to integers, rounding towards negative infinity, and store as floating point numbers.

Operation

for (int i = 0; i < 4; i++) {
  dst.fp32[i] = (fp32)(s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU Latency Throughput (IPC)
3A6000 4 2
3C5000 4 2

__m128d __lsx_vfrintrm_d (__m128d a)

Synopsis

__m128d __lsx_vfrintrm_d (__m128d a)
#include <lsxintrin.h>
Instruction: vfrintrm.d vr, vr
CPU Flags: LSX

Description

Round single-precision floating point elements in a to integers, rounding towards negative infinity, and store as floating point numbers.

Operation

for (int i = 0; i < 2; i++) {
  dst.fp64[i] = (fp64)(s64)a.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU Latency Throughput (IPC)
3A6000 4 2
3C5000 4 2

__m128 __lsx_vfrintrz_s (__m128 a)

Synopsis

__m128 __lsx_vfrintrz_s (__m128 a)
#include <lsxintrin.h>
Instruction: vfrintrz.s vr, vr
CPU Flags: LSX

Description

Round single-precision floating point elements in a to integers, rounding towards zero, and store as floating point numbers.

Operation

for (int i = 0; i < 4; i++) {
  dst.fp32[i] = (fp32)(s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU Latency Throughput (IPC)
3A6000 4 2
3C5000 4 2

__m128d __lsx_vfrintrz_d (__m128d a)

Synopsis

__m128d __lsx_vfrintrz_d (__m128d a)
#include <lsxintrin.h>
Instruction: vfrintrz.d vr, vr
CPU Flags: LSX

Description

Round single-precision floating point elements in a to integers, rounding towards zero, and store as floating point numbers.

Operation

for (int i = 0; i < 2; i++) {
  dst.fp64[i] = (fp64)(s64)a.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU Latency Throughput (IPC)
3A6000 4 2
3C5000 4 2

__m128 __lsx_vfrintrne_s (__m128 a)

Synopsis

__m128 __lsx_vfrintrne_s (__m128 a)
#include <lsxintrin.h>
Instruction: vfrintrne.s vr, vr
CPU Flags: LSX

Description

Round single-precision floating point elements in a to integers, rounding towards nearest even, and store as floating point numbers.

Operation

for (int i = 0; i < 4; i++) {
  dst.fp32[i] = (fp32)(s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU Latency Throughput (IPC)
3A6000 4 2
3C5000 4 2

__m128d __lsx_vfrintrne_d (__m128d a)

Synopsis

__m128d __lsx_vfrintrne_d (__m128d a)
#include <lsxintrin.h>
Instruction: vfrintrne.d vr, vr
CPU Flags: LSX

Description

Round single-precision floating point elements in a to integers, rounding towards nearest even, and store as floating point numbers.

Operation

for (int i = 0; i < 2; i++) {
  dst.fp64[i] = (fp64)(s64)a.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU Latency Throughput (IPC)
3A6000 4 2
3C5000 4 2