Memory Load & Store

__m128i __lsx_vld (void * addr, imm_n2048_2047 offset)

Synopsis

__m128i __lsx_vld (void * addr, imm_n2048_2047 offset)
#include <lsxintrin.h>
Instruction: vld vr, r, imm
CPU Flags: LSX

Description

Read whole vector from memory address addr + offset, save the data into dst. Note that you can use this intrinsic to load floating point vectors, even though the return type represents integer vectors.

Operation

dst = memory_load(128, addr + offset);

__m128i __lsx_vldx (void * addr, long int offset)

Synopsis

__m128i __lsx_vldx (void * addr, long int offset)
#include <lsxintrin.h>
Instruction: vldx vr, r, r
CPU Flags: LSX

Description

Read whole vector from memory address addr + offset, save the data into dst. Note that you can use this intrinsic to load floating point vectors, even though the return type represents integer vectors.

Operation

dst = memory_load(128, addr + offset);

__m128i __lsx_vldrepl_b (void * addr, imm_n2048_2047 offset)

Synopsis

__m128i __lsx_vldrepl_b (void * addr, imm_n2048_2047 offset)
#include <lsxintrin.h>
Instruction: vldrepl.b vr, r, imm
CPU Flags: LSX

Description

Read 8-bit data from memory address addr + (offset << 0), replicate the data to all vector lanes and save into dst.

Operation

u8 data = memory_load(8, addr + offset);
for (int i = 0; i < 16; i++) {
  dst.byte[i] = data;
}

__m128i __lsx_vldrepl_h (void * addr, imm_n1024_1023 offset)

Synopsis

__m128i __lsx_vldrepl_h (void * addr, imm_n1024_1023 offset)
#include <lsxintrin.h>
Instruction: vldrepl.h vr, r, imm
CPU Flags: LSX

Description

Read 16-bit data from memory address addr + (offset << 1), replicate the data to all vector lanes and save into dst.

Operation

u16 data = memory_load(16, addr + (offset << 1));
for (int i = 0; i < 8; i++) {
  dst.half[i] = data;
}

__m128i __lsx_vldrepl_w (void * addr, imm_n512_511 offset)

Synopsis

__m128i __lsx_vldrepl_w (void * addr, imm_n512_511 offset)
#include <lsxintrin.h>
Instruction: vldrepl.w vr, r, imm
CPU Flags: LSX

Description

Read 32-bit data from memory address addr + (offset << 2), replicate the data to all vector lanes and save into dst.

Operation

u32 data = memory_load(32, addr + (offset << 2));
for (int i = 0; i < 4; i++) {
  dst.word[i] = data;
}

__m128i __lsx_vldrepl_d (void * addr, imm_n256_255 offset)

Synopsis

__m128i __lsx_vldrepl_d (void * addr, imm_n256_255 offset)
#include <lsxintrin.h>
Instruction: vldrepl.d vr, r, imm
CPU Flags: LSX

Description

Read 64-bit data from memory address addr + (offset << 3), replicate the data to all vector lanes and save into dst.

Operation

u64 data = memory_load(64, addr + (offset << 3));
for (int i = 0; i < 2; i++) {
  dst.dword[i] = data;
}

void __lsx_vst (__m128i data, void * addr, imm_n2048_2047 offset)

Synopsis

void __lsx_vst (__m128i data, void * addr, imm_n2048_2047 offset)
#include <lsxintrin.h>
Instruction: vst vr, r, imm
CPU Flags: LSX

Description

Write whole vector data in data to memory address addr + offset.

Operation

memory_store(128, data, addr + offset);

void __lsx_vstx (__m128i data, void * addr, long int offset)

Synopsis

void __lsx_vstx (__m128i data, void * addr, long int offset)
#include <lsxintrin.h>
Instruction: vstx vr, r, r
CPU Flags: LSX

Description

Write whole-vector data in data to memory address addr + offset.

Operation

memory_store(128, data, addr + offset);

void __lsx_vstelm_b (__m128i data, void * addr, imm_n128_127 offset, imm0_15 lane)

Synopsis

void __lsx_vstelm_b (__m128i data, void * addr, imm_n128_127 offset, imm0_15 lane)
#include <lsxintrin.h>
Instruction: vstelm.b vr, r, imm, imm
CPU Flags: LSX

Description

Store the 8-bit element in data specified by lane to memory address addr + offset.

Operation

memory_store(8, data.byte[lane], addr + offset);

void __lsx_vstelm_h (__m128i data, void * addr, imm_n128_127 offset, imm0_7 lane)

Synopsis

void __lsx_vstelm_h (__m128i data, void * addr, imm_n128_127 offset, imm0_7 lane)
#include <lsxintrin.h>
Instruction: vstelm.h vr, r, imm, imm
CPU Flags: LSX

Description

Store the 16-bit element in data specified by lane to memory address addr + offset.

Operation

memory_store(16, data.half[lane], addr + offset);

void __lsx_vstelm_w (__m128i data, void * addr, imm_n128_127 offset, imm0_3 lane)

Synopsis

void __lsx_vstelm_w (__m128i data, void * addr, imm_n128_127 offset, imm0_3 lane)
#include <lsxintrin.h>
Instruction: vstelm.w vr, r, imm, imm
CPU Flags: LSX

Description

Store the 32-bit element in data specified by lane to memory address addr + offset.

Operation

memory_store(32, data.word[lane], addr + offset);

void __lsx_vstelm_d (__m128i data, void * addr, imm_n128_127 offset, imm0_1 lane)

Synopsis

void __lsx_vstelm_d (__m128i data, void * addr, imm_n128_127 offset, imm0_1 lane)
#include <lsxintrin.h>
Instruction: vstelm.d vr, r, imm, imm
CPU Flags: LSX

Description

Store the 64-bit element in data specified by lane to memory address addr + offset.

Operation

memory_store(64, data.dword[lane], addr + offset);