Floating Point Conversion
__m128d __lsx_vfcvth_d_s (__m128 a)
Synopsis
__m128d __lsx_vfcvth_d_s (__m128 a)
#include <lsxintrin.h>
Instruction: vfcvth.d.s vr, vr
CPU Flags: LSX
Description
Convert single precision floating point elements in higher half of a
to double precision.
Operation
for (int i = 0; i < 2; i++) {
dst.fp64[i] = a.fp32[2 + i];
}
Tested on real machine.
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 3 | 2 |
3C5000 | 3 | 1 |
__m128d __lsx_vfcvtl_d_s (__m128 a)
Synopsis
__m128d __lsx_vfcvtl_d_s (__m128 a)
#include <lsxintrin.h>
Instruction: vfcvtl.d.s vr, vr
CPU Flags: LSX
Description
Convert single precision floating point elements in lower half of a
to double precision.
Operation
for (int i = 0; i < 2; i++) {
dst.fp64[i] = a.fp32[i];
}
Tested on real machine.
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 3 | 2 |
3C5000 | 3 | 1 |
__m128 __lsx_vfcvt_s_d (__m128d a, __m128d b)
Synopsis
__m128 __lsx_vfcvt_s_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vfcvt.s.d vr, vr, vr
CPU Flags: LSX
Description
Convert double precision floating point elements in a
and b
to single precision.
Operation
for (int i = 0; i < 4; i++) {
if (i < 2) {
dst.fp32[i] = b.fp64[i];
} else {
dst.fp32[i] = a.fp64[i - 2];
}
}
Tested on real machine.
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 3 | 2 |
3C5000 | 3 | 1 |
__m128 __lsx_vfcvth_s_h (__m128i a)
Synopsis
__m128 __lsx_vfcvth_s_h (__m128i a)
#include <lsxintrin.h>
Instruction: vfcvth.s.h vr, vr
CPU Flags: LSX
Description
Convert half precision floating point elements in higher half of a
to single precision.
Operation
for (int i = 0; i < 4; i++) {
dst.fp32[i] = a.fp16[4 + i];
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 3 | 2 |
3C5000 | 3 | 1 |
__m128 __lsx_vfcvtl_s_h (__m128i a)
Synopsis
__m128 __lsx_vfcvtl_s_h (__m128i a)
#include <lsxintrin.h>
Instruction: vfcvtl.s.h vr, vr
CPU Flags: LSX
Description
Convert half precision floating point elements in lower half of a
to single precision.
Operation
for (int i = 0; i < 4; i++) {
dst.fp32[i] = a.fp16[i];
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 3 | 2 |
3C5000 | 3 | 1 |
__m128i __lsx_vfcvt_h_s (__m128 a, __m128 b)
Synopsis
__m128i __lsx_vfcvt_h_s (__m128 a, __m128 b)
#include <lsxintrin.h>
Instruction: vfcvt.h.s vr, vr, vr
CPU Flags: LSX
Description
Convert single precision floating point elements in a
and b
to half precision.
Operation
for (int i = 0; i < 8; i++) {
if (i < 4) {
dst.fp16[i] = b.fp32[i];
} else {
dst.fp16[i] = a.fp32[i - 4];
}
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 3 | 2 |
3C5000 | 3 | 1 |
__m128d __lsx_vffinth_d_w (__m128i a)
Synopsis
__m128d __lsx_vffinth_d_w (__m128i a)
#include <lsxintrin.h>
Instruction: vffinth.d.w vr, vr
CPU Flags: LSX
Description
Convert 32-bit integer elements in higher part of a
to double precision floating point numbers.
Operation
for (int i = 0; i < 2; i++) {
dst.fp64[i] = (f64)(s32)a.word[i + 2]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128d __lsx_vffintl_d_w (__m128i a)
Synopsis
__m128d __lsx_vffintl_d_w (__m128i a)
#include <lsxintrin.h>
Instruction: vffintl.d.w vr, vr
CPU Flags: LSX
Description
Convert 32-bit integer elements in lower part of a
to double precision floating point numbers.
Operation
for (int i = 0; i < 2; i++) {
dst.fp64[i] = (f64)(s32)a.word[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128d __lsx_vffint_d_l (__m128i a)
Synopsis
__m128d __lsx_vffint_d_l (__m128i a)
#include <lsxintrin.h>
Instruction: vffint.d.l vr, vr
CPU Flags: LSX
Description
Convert signed 64-bit integer elements in a
to double-precision floating point numbers.
Operation
for (int i = 0; i < 2; i++) {
dst.fp64[i] = (f64)(s64)a.dword[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128d __lsx_vffint_d_lu (__m128i a)
Synopsis
__m128d __lsx_vffint_d_lu (__m128i a)
#include <lsxintrin.h>
Instruction: vffint.d.lu vr, vr
CPU Flags: LSX
Description
Convert unsigned 64-bit integer elements in a
to double-precision floating point numbers.
Operation
for (int i = 0; i < 2; i++) {
dst.fp64[i] = (f64)(u64)a.dword[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128 __lsx_vffint_s_w (__m128i a)
Synopsis
__m128 __lsx_vffint_s_w (__m128i a)
#include <lsxintrin.h>
Instruction: vffint.s.w vr, vr
CPU Flags: LSX
Description
Convert signed 32-bit integer elements in a
to single-precision floating point numbers.
Operation
for (int i = 0; i < 4; i++) {
dst.fp32[i] = (f32)(s32)a.word[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128 __lsx_vffint_s_wu (__m128i a)
Synopsis
__m128 __lsx_vffint_s_wu (__m128i a)
#include <lsxintrin.h>
Instruction: vffint.s.wu vr, vr
CPU Flags: LSX
Description
Convert unsigned 32-bit integer elements in a
to single-precision floating point numbers.
Operation
for (int i = 0; i < 4; i++) {
dst.fp32[i] = (f32)(u32)a.word[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128 __lsx_vffint_s_l (__m128i a, __m128i b)
Synopsis
__m128 __lsx_vffint_s_l (__m128i a, __m128i b)
#include <lsxintrin.h>
Instruction: vffint.s.l vr, vr, vr
CPU Flags: LSX
Description
Convert 64-bit integer elements in a
and b
to single-precision floating point numbers.
Operation
for (int i = 0; i < 4; i++) {
dst.fp32[i] =
(i < 2) ? (f32)(s32)a.dword[i]
: (f32)(s32)b.dword[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintl_l_s (__m128 a)
Synopsis
__m128i __lsx_vftintl_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintl.l.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in lower part of a
to 64-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Tested on real machine.
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftinth_l_s (__m128 a)
Synopsis
__m128i __lsx_vftinth_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftinth.l.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in higher part of a
to 64-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s32)a.fp32[i + 2]; // rounding mode is not expressed in C
}
Tested on real machine.
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintrml_l_s (__m128 a)
Synopsis
__m128i __lsx_vftintrml_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrml.l.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in lower part of a
to 64-bit integer, rounding towards negative infinity.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintrmh_l_s (__m128 a)
Synopsis
__m128i __lsx_vftintrmh_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrmh.l.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in higher part of a
to 64-bit integer, rounding towards negative infinity.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s32)a.fp32[i + 2]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintrpl_l_s (__m128 a)
Synopsis
__m128i __lsx_vftintrpl_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrpl.l.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in lower part of a
to 64-bit integer, rounding towards positive infinity.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintrph_l_s (__m128 a)
Synopsis
__m128i __lsx_vftintrph_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrph.l.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in higher part of a
to 64-bit integer, rounding towards positive infinity.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s32)a.fp32[i + 2]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintrzl_l_s (__m128 a)
Synopsis
__m128i __lsx_vftintrzl_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrzl.l.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in lower part of a
to 64-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintrzh_l_s (__m128 a)
Synopsis
__m128i __lsx_vftintrzh_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrzh.l.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in higher part of a
to 64-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s32)a.fp32[i + 2]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintrnel_l_s (__m128 a)
Synopsis
__m128i __lsx_vftintrnel_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrnel.l.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in lower part of a
to 64-bit integer, rounding towards nearest even.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintrneh_l_s (__m128 a)
Synopsis
__m128i __lsx_vftintrneh_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrneh.l.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in higher part of a
to 64-bit integer, rounding towards nearest even.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s32)a.fp32[i + 2]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftint_l_d (__m128d a)
Synopsis
__m128i __lsx_vftint_l_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftint.l.d vr, vr
CPU Flags: LSX
Description
Convert double-precision floating point elements in a
to signed 64-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftint_w_s (__m128 a)
Synopsis
__m128i __lsx_vftint_w_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftint.w.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in a
to signed 32-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 4; i++) {
dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftintrm_l_d (__m128d a)
Synopsis
__m128i __lsx_vftintrm_l_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftintrm.l.d vr, vr
CPU Flags: LSX
Description
Convert double-precision floating point elements in a
to signed 64-bit integer, rounding towards negative infinity.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftintrm_w_s (__m128 a)
Synopsis
__m128i __lsx_vftintrm_w_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrm.w.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in a
to signed 32-bit integer, rounding towards negative infinity.
Operation
for (int i = 0; i < 4; i++) {
dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftintrp_l_d (__m128d a)
Synopsis
__m128i __lsx_vftintrp_l_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftintrp.l.d vr, vr
CPU Flags: LSX
Description
Convert double-precision floating point elements in a
to signed 64-bit integer, rounding towards positive infinity.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftintrp_w_s (__m128 a)
Synopsis
__m128i __lsx_vftintrp_w_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrp.w.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in a
to signed 32-bit integer, rounding towards positive infinity.
Operation
for (int i = 0; i < 4; i++) {
dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftintrz_l_d (__m128d a)
Synopsis
__m128i __lsx_vftintrz_l_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftintrz.l.d vr, vr
CPU Flags: LSX
Description
Convert double-precision floating point elements in a
to signed 64-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftintrz_w_s (__m128 a)
Synopsis
__m128i __lsx_vftintrz_w_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrz.w.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in a
to signed 32-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 4; i++) {
dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftintrne_l_d (__m128d a)
Synopsis
__m128i __lsx_vftintrne_l_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftintrne.l.d vr, vr
CPU Flags: LSX
Description
Convert double-precision floating point elements in a
to signed 64-bit integer, rounding towards nearest even.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftintrne_w_s (__m128 a)
Synopsis
__m128i __lsx_vftintrne_w_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrne.w.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in a
to signed 32-bit integer, rounding towards nearest even.
Operation
for (int i = 0; i < 4; i++) {
dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftint_lu_d (__m128d a)
Synopsis
__m128i __lsx_vftint_lu_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftint.lu.d vr, vr
CPU Flags: LSX
Description
Convert double-precision floating point elements in a
to unsigned 64-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (u64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftint_wu_s (__m128 a)
Synopsis
__m128i __lsx_vftint_wu_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftint.wu.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in a
to unsigned 32-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 4; i++) {
dst.word[i] = (u32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftintrz_lu_d (__m128d a)
Synopsis
__m128i __lsx_vftintrz_lu_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftintrz.lu.d vr, vr
CPU Flags: LSX
Description
Convert double-precision floating point elements in a
to unsigned 64-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (u64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftintrz_wu_s (__m128 a)
Synopsis
__m128i __lsx_vftintrz_wu_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrz.wu.s vr, vr
CPU Flags: LSX
Description
Convert single-precision floating point elements in a
to unsigned 32-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 4; i++) {
dst.word[i] = (u32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m128i __lsx_vftint_w_d (__m128d a, __m128d b)
Synopsis
__m128i __lsx_vftint_w_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vftint.w.d vr, vr, vr
CPU Flags: LSX
Description
Convert double-precision floating point elements in a
and b
to 32-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (i < 1)
? (s64)a.fp64[i]
: (s64)b.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintrm_w_d (__m128d a, __m128d b)
Synopsis
__m128i __lsx_vftintrm_w_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vftintrm.w.d vr, vr, vr
CPU Flags: LSX
Description
Convert double-precision floating point elements in a
and b
to 32-bit integer, rounding towards negative infinity.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (i < 1)
? (s64)a.fp64[i]
: (s64)b.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintrp_w_d (__m128d a, __m128d b)
Synopsis
__m128i __lsx_vftintrp_w_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vftintrp.w.d vr, vr, vr
CPU Flags: LSX
Description
Convert double-precision floating point elements in a
and b
to 32-bit integer, rounding towards positive infinity.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (i < 1)
? (s64)a.fp64[i]
: (s64)b.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintrz_w_d (__m128d a, __m128d b)
Synopsis
__m128i __lsx_vftintrz_w_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vftintrz.w.d vr, vr, vr
CPU Flags: LSX
Description
Convert double-precision floating point elements in a
and b
to 32-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (i < 1)
? (s64)a.fp64[i]
: (s64)b.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m128i __lsx_vftintrne_w_d (__m128d a, __m128d b)
Synopsis
__m128i __lsx_vftintrne_w_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vftintrne.w.d vr, vr, vr
CPU Flags: LSX
Description
Convert double-precision floating point elements in a
and b
to 32-bit integer, rounding towards nearest even.
Operation
for (int i = 0; i < 2; i++) {
dst.dword[i] = (i < 1)
? (s64)a.fp64[i]
: (s64)b.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |