Floating Point Conversion

m128d lsx_vfcvth_d_s (__m128 a)

Synopsis

__m128d __lsx_vfcvth_d_s (__m128 a)
#include <lsxintrin.h>
Instruction: vfcvth.d.s vr, vr
CPU Flags: LSX

Description

Convert single precision floating point elements in higher half of a to double precision.

Operation

for (int i = 0; i < 2; i++) {
  dst.fp64[i] = a.fp32[2 + i];
}

Tested on real machine.

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	3	2
3C5000	3	1

m128d lsx_vfcvtl_d_s (__m128 a)

Synopsis

__m128d __lsx_vfcvtl_d_s (__m128 a)
#include <lsxintrin.h>
Instruction: vfcvtl.d.s vr, vr
CPU Flags: LSX

Description

Convert single precision floating point elements in lower half of a to double precision.

Operation

for (int i = 0; i < 2; i++) {
  dst.fp64[i] = a.fp32[i];
}

Tested on real machine.

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	3	2
3C5000	3	1

m128 lsx_vfcvt_s_d (m128d a, m128d b)

Synopsis

__m128 __lsx_vfcvt_s_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vfcvt.s.d vr, vr, vr
CPU Flags: LSX

Description

Convert double precision floating point elements in a and b to single precision.

Operation

for (int i = 0; i < 4; i++) {
  if (i < 2) {
    dst.fp32[i] = b.fp64[i];
  } else {
    dst.fp32[i] = a.fp64[i - 2];
  }
}

Tested on real machine.

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	3	2
3C5000	3	1

m128 lsx_vfcvth_s_h (__m128i a)

Synopsis

__m128 __lsx_vfcvth_s_h (__m128i a)
#include <lsxintrin.h>
Instruction: vfcvth.s.h vr, vr
CPU Flags: LSX

Description

Convert half precision floating point elements in higher half of a to single precision.

Operation

for (int i = 0; i < 4; i++) {
  dst.fp32[i] = a.fp16[4 + i];
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	3	2
3C5000	3	1

m128 lsx_vfcvtl_s_h (__m128i a)

Synopsis

__m128 __lsx_vfcvtl_s_h (__m128i a)
#include <lsxintrin.h>
Instruction: vfcvtl.s.h vr, vr
CPU Flags: LSX

Description

Convert half precision floating point elements in lower half of a to single precision.

Operation

for (int i = 0; i < 4; i++) {
  dst.fp32[i] = a.fp16[i];
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	3	2
3C5000	3	1

m128i lsx_vfcvt_h_s (m128 a, m128 b)

Synopsis

__m128i __lsx_vfcvt_h_s (__m128 a, __m128 b)
#include <lsxintrin.h>
Instruction: vfcvt.h.s vr, vr, vr
CPU Flags: LSX

Description

Convert single precision floating point elements in a and b to half precision.

Operation

for (int i = 0; i < 8; i++) {
  if (i < 4) {
    dst.fp16[i] = b.fp32[i];
  } else {
    dst.fp16[i] = a.fp32[i - 4];
  }
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	3	2
3C5000	3	1

m128d lsx_vffinth_d_w (__m128i a)

Synopsis

__m128d __lsx_vffinth_d_w (__m128i a)
#include <lsxintrin.h>
Instruction: vffinth.d.w vr, vr
CPU Flags: LSX

Description

Convert 32-bit integer elements in higher part of a to double precision floating point numbers.

Operation

for (int i = 0; i < 2; i++) {
  dst.fp64[i] = (f64)(s32)a.word[i + 2]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128d lsx_vffintl_d_w (__m128i a)

Synopsis

__m128d __lsx_vffintl_d_w (__m128i a)
#include <lsxintrin.h>
Instruction: vffintl.d.w vr, vr
CPU Flags: LSX

Description

Convert 32-bit integer elements in lower part of a to double precision floating point numbers.

Operation

for (int i = 0; i < 2; i++) {
  dst.fp64[i] = (f64)(s32)a.word[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128d lsx_vffint_d_l (__m128i a)

Synopsis

__m128d __lsx_vffint_d_l (__m128i a)
#include <lsxintrin.h>
Instruction: vffint.d.l vr, vr
CPU Flags: LSX

Description

Convert signed 64-bit integer elements in a to double-precision floating point numbers.

Operation

for (int i = 0; i < 2; i++) {
  dst.fp64[i] = (f64)(s64)a.dword[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128d lsx_vffint_d_lu (__m128i a)

Synopsis

__m128d __lsx_vffint_d_lu (__m128i a)
#include <lsxintrin.h>
Instruction: vffint.d.lu vr, vr
CPU Flags: LSX

Description

Convert unsigned 64-bit integer elements in a to double-precision floating point numbers.

Operation

for (int i = 0; i < 2; i++) {
  dst.fp64[i] = (f64)(u64)a.dword[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128 lsx_vffint_s_w (__m128i a)

Synopsis

__m128 __lsx_vffint_s_w (__m128i a)
#include <lsxintrin.h>
Instruction: vffint.s.w vr, vr
CPU Flags: LSX

Description

Convert signed 32-bit integer elements in a to single-precision floating point numbers.

Operation

for (int i = 0; i < 4; i++) {
  dst.fp32[i] = (f32)(s32)a.word[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128 lsx_vffint_s_wu (__m128i a)

Synopsis

__m128 __lsx_vffint_s_wu (__m128i a)
#include <lsxintrin.h>
Instruction: vffint.s.wu vr, vr
CPU Flags: LSX

Description

Convert unsigned 32-bit integer elements in a to single-precision floating point numbers.

Operation

for (int i = 0; i < 4; i++) {
  dst.fp32[i] = (f32)(u32)a.word[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128 lsx_vffint_s_l (m128i a, m128i b)

Synopsis

__m128 __lsx_vffint_s_l (__m128i a, __m128i b)
#include <lsxintrin.h>
Instruction: vffint.s.l vr, vr, vr
CPU Flags: LSX

Description

Convert 64-bit integer elements in a and b to single-precision floating point numbers.

Operation

for (int i = 0; i < 4; i++) {
  dst.fp32[i] =
      (i < 2) ? (f32)(s32)a.dword[i]
              : (f32)(s32)b.dword[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintl_l_s (__m128 a)

Synopsis

__m128i __lsx_vftintl_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintl.l.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in lower part of a to 64-bit integer, using current rounding mode specified in fscr.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}

Tested on real machine.

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftinth_l_s (__m128 a)

Synopsis

__m128i __lsx_vftinth_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftinth.l.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in higher part of a to 64-bit integer, using current rounding mode specified in fscr.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s32)a.fp32[i + 2]; // rounding mode is not expressed in C
}

Tested on real machine.

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintrml_l_s (__m128 a)

Synopsis

__m128i __lsx_vftintrml_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrml.l.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in lower part of a to 64-bit integer, rounding towards negative infinity.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintrmh_l_s (__m128 a)

Synopsis

__m128i __lsx_vftintrmh_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrmh.l.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in higher part of a to 64-bit integer, rounding towards negative infinity.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s32)a.fp32[i + 2]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintrpl_l_s (__m128 a)

Synopsis

__m128i __lsx_vftintrpl_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrpl.l.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in lower part of a to 64-bit integer, rounding towards positive infinity.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintrph_l_s (__m128 a)

Synopsis

__m128i __lsx_vftintrph_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrph.l.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in higher part of a to 64-bit integer, rounding towards positive infinity.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s32)a.fp32[i + 2]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintrzl_l_s (__m128 a)

Synopsis

__m128i __lsx_vftintrzl_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrzl.l.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in lower part of a to 64-bit integer, rounding towards zero.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintrzh_l_s (__m128 a)

Synopsis

__m128i __lsx_vftintrzh_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrzh.l.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in higher part of a to 64-bit integer, rounding towards zero.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s32)a.fp32[i + 2]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintrnel_l_s (__m128 a)

Synopsis

__m128i __lsx_vftintrnel_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrnel.l.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in lower part of a to 64-bit integer, rounding towards nearest even.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintrneh_l_s (__m128 a)

Synopsis

__m128i __lsx_vftintrneh_l_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrneh.l.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in higher part of a to 64-bit integer, rounding towards nearest even.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s32)a.fp32[i + 2]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftint_l_d (__m128d a)

Synopsis

__m128i __lsx_vftint_l_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftint.l.d vr, vr
CPU Flags: LSX

Description

Convert double-precision floating point elements in a to signed 64-bit integer, using current rounding mode specified in fscr.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftint_w_s (__m128 a)

Synopsis

__m128i __lsx_vftint_w_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftint.w.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in a to signed 32-bit integer, using current rounding mode specified in fscr.

Operation

for (int i = 0; i < 4; i++) {
  dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftintrm_l_d (__m128d a)

Synopsis

__m128i __lsx_vftintrm_l_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftintrm.l.d vr, vr
CPU Flags: LSX

Description

Convert double-precision floating point elements in a to signed 64-bit integer, rounding towards negative infinity.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftintrm_w_s (__m128 a)

Synopsis

__m128i __lsx_vftintrm_w_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrm.w.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in a to signed 32-bit integer, rounding towards negative infinity.

Operation

for (int i = 0; i < 4; i++) {
  dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftintrp_l_d (__m128d a)

Synopsis

__m128i __lsx_vftintrp_l_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftintrp.l.d vr, vr
CPU Flags: LSX

Description

Convert double-precision floating point elements in a to signed 64-bit integer, rounding towards positive infinity.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftintrp_w_s (__m128 a)

Synopsis

__m128i __lsx_vftintrp_w_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrp.w.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in a to signed 32-bit integer, rounding towards positive infinity.

Operation

for (int i = 0; i < 4; i++) {
  dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftintrz_l_d (__m128d a)

Synopsis

__m128i __lsx_vftintrz_l_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftintrz.l.d vr, vr
CPU Flags: LSX

Description

Convert double-precision floating point elements in a to signed 64-bit integer, rounding towards zero.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftintrz_w_s (__m128 a)

Synopsis

__m128i __lsx_vftintrz_w_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrz.w.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in a to signed 32-bit integer, rounding towards zero.

Operation

for (int i = 0; i < 4; i++) {
  dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftintrne_l_d (__m128d a)

Synopsis

__m128i __lsx_vftintrne_l_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftintrne.l.d vr, vr
CPU Flags: LSX

Description

Convert double-precision floating point elements in a to signed 64-bit integer, rounding towards nearest even.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (s64)a.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftintrne_w_s (__m128 a)

Synopsis

__m128i __lsx_vftintrne_w_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrne.w.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in a to signed 32-bit integer, rounding towards nearest even.

Operation

for (int i = 0; i < 4; i++) {
  dst.word[i] = (s32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftint_lu_d (__m128d a)

Synopsis

__m128i __lsx_vftint_lu_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftint.lu.d vr, vr
CPU Flags: LSX

Description

Convert double-precision floating point elements in a to unsigned 64-bit integer, using current rounding mode specified in fscr.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (u64)a.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftint_wu_s (__m128 a)

Synopsis

__m128i __lsx_vftint_wu_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftint.wu.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in a to unsigned 32-bit integer, using current rounding mode specified in fscr.

Operation

for (int i = 0; i < 4; i++) {
  dst.word[i] = (u32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftintrz_lu_d (__m128d a)

Synopsis

__m128i __lsx_vftintrz_lu_d (__m128d a)
#include <lsxintrin.h>
Instruction: vftintrz.lu.d vr, vr
CPU Flags: LSX

Description

Convert double-precision floating point elements in a to unsigned 64-bit integer, rounding towards zero.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (u64)a.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftintrz_wu_s (__m128 a)

Synopsis

__m128i __lsx_vftintrz_wu_s (__m128 a)
#include <lsxintrin.h>
Instruction: vftintrz.wu.s vr, vr
CPU Flags: LSX

Description

Convert single-precision floating point elements in a to unsigned 32-bit integer, rounding towards zero.

Operation

for (int i = 0; i < 4; i++) {
  dst.word[i] = (u32)a.fp32[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	4	4
3C5000	4	2

m128i lsx_vftint_w_d (m128d a, m128d b)

Synopsis

__m128i __lsx_vftint_w_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vftint.w.d vr, vr, vr
CPU Flags: LSX

Description

Convert double-precision floating point elements in a and b to 32-bit integer, using current rounding mode specified in fscr.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (i < 1)
                     ? (s64)a.fp64[i]
                     : (s64)b.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintrm_w_d (m128d a, m128d b)

Synopsis

__m128i __lsx_vftintrm_w_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vftintrm.w.d vr, vr, vr
CPU Flags: LSX

Description

Convert double-precision floating point elements in a and b to 32-bit integer, rounding towards negative infinity.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (i < 1)
                     ? (s64)a.fp64[i]
                     : (s64)b.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintrp_w_d (m128d a, m128d b)

Synopsis

__m128i __lsx_vftintrp_w_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vftintrp.w.d vr, vr, vr
CPU Flags: LSX

Description

Convert double-precision floating point elements in a and b to 32-bit integer, rounding towards positive infinity.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (i < 1)
                     ? (s64)a.fp64[i]
                     : (s64)b.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintrz_w_d (m128d a, m128d b)

Synopsis

__m128i __lsx_vftintrz_w_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vftintrz.w.d vr, vr, vr
CPU Flags: LSX

Description

Convert double-precision floating point elements in a and b to 32-bit integer, rounding towards zero.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (i < 1)
                     ? (s64)a.fp64[i]
                     : (s64)b.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1

m128i lsx_vftintrne_w_d (m128d a, m128d b)

Synopsis

__m128i __lsx_vftintrne_w_d (__m128d a, __m128d b)
#include <lsxintrin.h>
Instruction: vftintrne.w.d vr, vr, vr
CPU Flags: LSX

Description

Convert double-precision floating point elements in a and b to 32-bit integer, rounding towards nearest even.

Operation

for (int i = 0; i < 2; i++) {
  dst.dword[i] = (i < 1)
                     ? (s64)a.fp64[i]
                     : (s64)b.fp64[i]; // rounding mode is not expressed in C
}

Latency and Throughput

CPU	Latency	Throughput (CPI)
3A6000	5	2
3C5000	5	1