Floating Point Conversion
__m256d __lasx_xvfcvth_d_s (__m256 a)
Synopsis
__m256d __lasx_xvfcvth_d_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvfcvth.d.s xr, xr
CPU Flags: LASX
Description
Convert single precision floating point elements in higher half of a
to double precision.
Operation
for (int i = 0; i < 4; i++) {
dst.fp64[i] = a.fp32[4 + i];
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 3 | 2 |
3C5000 | 3 | 1 |
__m256d __lasx_xvfcvtl_d_s (__m256 a)
Synopsis
__m256d __lasx_xvfcvtl_d_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvfcvtl.d.s xr, xr
CPU Flags: LASX
Description
Convert single precision floating point elements in lower half of a
to double precision.
Operation
for (int i = 0; i < 4; i++) {
dst.fp64[i] = a.fp32[i];
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 3 | 2 |
3C5000 | 3 | 1 |
__m256 __lasx_xvfcvt_s_d (__m256d a, __m256d b)
Synopsis
__m256 __lasx_xvfcvt_s_d (__m256d a, __m256d b)
#include <lasxintrin.h>
Instruction: xvfcvt.s.d xr, xr, xr
CPU Flags: LASX
Description
Convert double precision floating point elements in a
and b
to single precision.
Operation
for (int i = 0; i < 8; i++) {
if (i < 4) {
dst.fp32[i] = b.fp64[i];
} else {
dst.fp32[i] = a.fp64[i - 4];
}
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 3 | 2 |
3C5000 | 3 | 1 |
__m256 __lasx_xvfcvth_s_h (__m256i a)
Synopsis
__m256 __lasx_xvfcvth_s_h (__m256i a)
#include <lasxintrin.h>
Instruction: xvfcvth.s.h xr, xr
CPU Flags: LASX
Description
Convert half precision floating point elements in higher half of a
to single precision.
Operation
for (int i = 0; i < 8; i++) {
dst.fp32[i] = a.fp16[8 + i];
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 3 | 2 |
3C5000 | 3 | 1 |
__m256 __lasx_xvfcvtl_s_h (__m256i a)
Synopsis
__m256 __lasx_xvfcvtl_s_h (__m256i a)
#include <lasxintrin.h>
Instruction: xvfcvtl.s.h xr, xr
CPU Flags: LASX
Description
Convert half precision floating point elements in lower half of a
to single precision.
Operation
for (int i = 0; i < 8; i++) {
dst.fp32[i] = a.fp16[i];
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 3 | 2 |
3C5000 | 3 | 1 |
__m256i __lasx_xvfcvt_h_s (__m256 a, __m256 b)
Synopsis
__m256i __lasx_xvfcvt_h_s (__m256 a, __m256 b)
#include <lasxintrin.h>
Instruction: xvfcvt.h.s xr, xr, xr
CPU Flags: LASX
Description
Convert single precision floating point elements in a
and b
to half precision.
Operation
for (int i = 0; i < 16; i++) {
if (i < 8) {
dst.fp16[i] = b.fp32[i];
} else {
dst.fp16[i] = a.fp32[i - 8];
}
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 3 | 2 |
3C5000 | 3 | 1 |
__m256d __lasx_xvffinth_d_w (__m256i a)
Synopsis
__m256d __lasx_xvffinth_d_w (__m256i a)
#include <lasxintrin.h>
Instruction: xvffinth.d.w xr, xr
CPU Flags: LASX
Description
Convert 32-bit integer elements in higher part of a
to double precision floating point numbers.
Operation
for (int i = 0; i < 4; i++) {
dst.fp64[i] = (f64)(s32)a.word[i + 4]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256d __lasx_xvffintl_d_w (__m256i a)
Synopsis
__m256d __lasx_xvffintl_d_w (__m256i a)
#include <lasxintrin.h>
Instruction: xvffintl.d.w xr, xr
CPU Flags: LASX
Description
Convert 32-bit integer elements in lower part of a
to double precision floating point numbers.
Operation
for (int i = 0; i < 4; i++) {
dst.fp64[i] = (f64)(s32)a.word[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256d __lasx_xvffint_d_l (__m256i a)
Synopsis
__m256d __lasx_xvffint_d_l (__m256i a)
#include <lasxintrin.h>
Instruction: xvffint.d.l xr, xr
CPU Flags: LASX
Description
Convert signed 64-bit integer elements in a
to double-precision floating point numbers.
Operation
for (int i = 0; i < 4; i++) {
dst.fp64[i] = (f64)(s64)a.dword[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256d __lasx_xvffint_d_lu (__m256i a)
Synopsis
__m256d __lasx_xvffint_d_lu (__m256i a)
#include <lasxintrin.h>
Instruction: xvffint.d.lu xr, xr
CPU Flags: LASX
Description
Convert unsigned 64-bit integer elements in a
to double-precision floating point numbers.
Operation
for (int i = 0; i < 4; i++) {
dst.fp64[i] = (f64)(u64)a.dword[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256 __lasx_xvffint_s_w (__m256i a)
Synopsis
__m256 __lasx_xvffint_s_w (__m256i a)
#include <lasxintrin.h>
Instruction: xvffint.s.w xr, xr
CPU Flags: LASX
Description
Convert signed 32-bit integer elements in a
to single-precision floating point numbers.
Operation
for (int i = 0; i < 8; i++) {
dst.fp32[i] = (f32)(s32)a.word[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256 __lasx_xvffint_s_wu (__m256i a)
Synopsis
__m256 __lasx_xvffint_s_wu (__m256i a)
#include <lasxintrin.h>
Instruction: xvffint.s.wu xr, xr
CPU Flags: LASX
Description
Convert unsigned 32-bit integer elements in a
to single-precision floating point numbers.
Operation
for (int i = 0; i < 8; i++) {
dst.fp32[i] = (f32)(u32)a.word[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256 __lasx_xvffint_s_l (__m256i a, __m256i b)
Synopsis
__m256 __lasx_xvffint_s_l (__m256i a, __m256i b)
#include <lasxintrin.h>
Instruction: xvffint.s.l xr, xr, xr
CPU Flags: LASX
Description
Convert 64-bit integer elements in a
and b
to single-precision floating point numbers.
Operation
for (int i = 0; i < 8; i++) {
dst.fp32[i] =
(i < 4) ? (f32)(s32)a.dword[i]
: (f32)(s32)b.dword[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintl_l_s (__m256 a)
Synopsis
__m256i __lasx_xvftintl_l_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintl.l.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in lower part of a
to 64-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftinth_l_s (__m256 a)
Synopsis
__m256i __lasx_xvftinth_l_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftinth.l.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in higher part of a
to 64-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s32)a.fp32[i + 4]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintrml_l_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrml_l_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrml.l.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in lower part of a
to 64-bit integer, rounding towards negative infinity.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintrmh_l_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrmh_l_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrmh.l.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in higher part of a
to 64-bit integer, rounding towards negative infinity.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s32)a.fp32[i + 4]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintrpl_l_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrpl_l_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrpl.l.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in lower part of a
to 64-bit integer, rounding towards positive infinity.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintrph_l_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrph_l_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrph.l.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in higher part of a
to 64-bit integer, rounding towards positive infinity.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s32)a.fp32[i + 4]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintrzl_l_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrzl_l_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrzl.l.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in lower part of a
to 64-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintrzh_l_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrzh_l_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrzh.l.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in higher part of a
to 64-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s32)a.fp32[i + 4]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintrnel_l_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrnel_l_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrnel.l.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in lower part of a
to 64-bit integer, rounding towards nearest even.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintrneh_l_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrneh_l_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrneh.l.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in higher part of a
to 64-bit integer, rounding towards nearest even.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s32)a.fp32[i + 4]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftint_l_d (__m256d a)
Synopsis
__m256i __lasx_xvftint_l_d (__m256d a)
#include <lasxintrin.h>
Instruction: xvftint.l.d xr, xr
CPU Flags: LASX
Description
Convert double-precision floating point elements in a
to signed 64-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftint_w_s (__m256 a)
Synopsis
__m256i __lasx_xvftint_w_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftint.w.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in a
to signed 32-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 8; i++) {
dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftintrm_l_d (__m256d a)
Synopsis
__m256i __lasx_xvftintrm_l_d (__m256d a)
#include <lasxintrin.h>
Instruction: xvftintrm.l.d xr, xr
CPU Flags: LASX
Description
Convert double-precision floating point elements in a
to signed 64-bit integer, rounding towards negative infinity.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftintrm_w_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrm_w_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrm.w.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in a
to signed 32-bit integer, rounding towards negative infinity.
Operation
for (int i = 0; i < 8; i++) {
dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftintrp_l_d (__m256d a)
Synopsis
__m256i __lasx_xvftintrp_l_d (__m256d a)
#include <lasxintrin.h>
Instruction: xvftintrp.l.d xr, xr
CPU Flags: LASX
Description
Convert double-precision floating point elements in a
to signed 64-bit integer, rounding towards positive infinity.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftintrp_w_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrp_w_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrp.w.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in a
to signed 32-bit integer, rounding towards positive infinity.
Operation
for (int i = 0; i < 8; i++) {
dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftintrz_l_d (__m256d a)
Synopsis
__m256i __lasx_xvftintrz_l_d (__m256d a)
#include <lasxintrin.h>
Instruction: xvftintrz.l.d xr, xr
CPU Flags: LASX
Description
Convert double-precision floating point elements in a
to signed 64-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftintrz_w_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrz_w_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrz.w.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in a
to signed 32-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 8; i++) {
dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftintrne_l_d (__m256d a)
Synopsis
__m256i __lasx_xvftintrne_l_d (__m256d a)
#include <lasxintrin.h>
Instruction: xvftintrne.l.d xr, xr
CPU Flags: LASX
Description
Convert double-precision floating point elements in a
to signed 64-bit integer, rounding towards nearest even.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftintrne_w_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrne_w_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrne.w.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in a
to signed 32-bit integer, rounding towards nearest even.
Operation
for (int i = 0; i < 8; i++) {
dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftint_lu_d (__m256d a)
Synopsis
__m256i __lasx_xvftint_lu_d (__m256d a)
#include <lasxintrin.h>
Instruction: xvftint.lu.d xr, xr
CPU Flags: LASX
Description
Convert double-precision floating point elements in a
to unsigned 64-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (u64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftint_wu_s (__m256 a)
Synopsis
__m256i __lasx_xvftint_wu_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftint.wu.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in a
to unsigned 32-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 8; i++) {
dst.word[i] = (u32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftintrz_lu_d (__m256d a)
Synopsis
__m256i __lasx_xvftintrz_lu_d (__m256d a)
#include <lasxintrin.h>
Instruction: xvftintrz.lu.d xr, xr
CPU Flags: LASX
Description
Convert double-precision floating point elements in a
to unsigned 64-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 4; i++) {
dst.dword[i] = (u64)a.fp64[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftintrz_wu_s (__m256 a)
Synopsis
__m256i __lasx_xvftintrz_wu_s (__m256 a)
#include <lasxintrin.h>
Instruction: xvftintrz.wu.s xr, xr
CPU Flags: LASX
Description
Convert single-precision floating point elements in a
to unsigned 32-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 8; i++) {
dst.word[i] = (u32)a.fp32[i]; // rounding mode is not expressed in C
}
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 4 | 4 |
3C5000 | 4 | 2 |
__m256i __lasx_xvftint_w_d (__m256d a, __m256d b)
Synopsis
__m256i __lasx_xvftint_w_d (__m256d a, __m256d b)
#include <lasxintrin.h>
Instruction: xvftint.w.d xr, xr, xr
CPU Flags: LASX
Description
Convert double-precision floating point elements in a
and b
to 32-bit integer, using current rounding mode specified in fscr
.
Operation
for (int i = 0; i < 4; i++) {
dst.word[i] = (i < 2)
? (s32)b.fp64[i]
: (s32)a.fp64[i - 2]; // rounding mode is not expressed in C
}
for (int i = 4; i < 8; i++) {
dst.word[i] = (i < 6)
? (s32)b.fp64[i - 2]
: (s32)a.fp64[i - 4]; // rounding mode is not expressed in C
}
Tested on real machine.
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintrm_w_d (__m256d a, __m256d b)
Synopsis
__m256i __lasx_xvftintrm_w_d (__m256d a, __m256d b)
#include <lasxintrin.h>
Instruction: xvftintrm.w.d xr, xr, xr
CPU Flags: LASX
Description
Convert double-precision floating point elements in a
and b
to 32-bit integer, rounding towards negative infinity.
Operation
for (int i = 0; i < 4; i++) {
dst.word[i] = (i < 2)
? (s32)b.fp64[i]
: (s32)a.fp64[i - 2]; // rounding mode is not expressed in C
}
for (int i = 4; i < 8; i++) {
dst.word[i] = (i < 6)
? (s32)b.fp64[i - 2]
: (s32)a.fp64[i - 4]; // rounding mode is not expressed in C
}
Tested on real machine.
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintrp_w_d (__m256d a, __m256d b)
Synopsis
__m256i __lasx_xvftintrp_w_d (__m256d a, __m256d b)
#include <lasxintrin.h>
Instruction: xvftintrp.w.d xr, xr, xr
CPU Flags: LASX
Description
Convert double-precision floating point elements in a
and b
to 32-bit integer, rounding towards positive infinity.
Operation
for (int i = 0; i < 4; i++) {
dst.word[i] = (i < 2)
? (s32)b.fp64[i]
: (s32)a.fp64[i - 2]; // rounding mode is not expressed in C
}
for (int i = 4; i < 8; i++) {
dst.word[i] = (i < 6)
? (s32)b.fp64[i - 2]
: (s32)a.fp64[i - 4]; // rounding mode is not expressed in C
}
Tested on real machine.
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintrz_w_d (__m256d a, __m256d b)
Synopsis
__m256i __lasx_xvftintrz_w_d (__m256d a, __m256d b)
#include <lasxintrin.h>
Instruction: xvftintrz.w.d xr, xr, xr
CPU Flags: LASX
Description
Convert double-precision floating point elements in a
and b
to 32-bit integer, rounding towards zero.
Operation
for (int i = 0; i < 4; i++) {
dst.word[i] = (i < 2)
? (s32)b.fp64[i]
: (s32)a.fp64[i - 2]; // rounding mode is not expressed in C
}
for (int i = 4; i < 8; i++) {
dst.word[i] = (i < 6)
? (s32)b.fp64[i - 2]
: (s32)a.fp64[i - 4]; // rounding mode is not expressed in C
}
Tested on real machine.
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |
__m256i __lasx_xvftintrne_w_d (__m256d a, __m256d b)
Synopsis
__m256i __lasx_xvftintrne_w_d (__m256d a, __m256d b)
#include <lasxintrin.h>
Instruction: xvftintrne.w.d xr, xr, xr
CPU Flags: LASX
Description
Convert double-precision floating point elements in a
and b
to 32-bit integer, rounding towards nearest even.
Operation
for (int i = 0; i < 4; i++) {
dst.word[i] = (i < 2)
? (s32)b.fp64[i]
: (s32)a.fp64[i - 2]; // rounding mode is not expressed in C
}
for (int i = 4; i < 8; i++) {
dst.word[i] = (i < 6)
? (s32)b.fp64[i - 2]
: (s32)a.fp64[i - 4]; // rounding mode is not expressed in C
}
Tested on real machine.
Latency and Throughput
CPU | Latency | Throughput (IPC) |
---|---|---|
3A6000 | 5 | 2 |
3C5000 | 5 | 1 |