Memory Load & Store
__m256i __lasx_xvld (void * addr, imm_n2048_2047 offset)
Synopsis
__m256i __lasx_xvld (void * addr, imm_n2048_2047 offset)
#include <lasxintrin.h>
Instruction: xvld xr, r, imm
CPU Flags: LASX
Description
Read whole vector from memory address addr + offset
, save the data into dst
. Note that you can use this intrinsic to load floating point vectors, even though the return type represents integer vectors.
Operation
dst = memory_load(256, addr + offset);
__m256i __lasx_xvldx (void * addr, long int offset)
Synopsis
__m256i __lasx_xvldx (void * addr, long int offset)
#include <lasxintrin.h>
Instruction: xvldx xr, r, r
CPU Flags: LASX
Description
Read whole vector from memory address addr + offset
, save the data into dst
. Note that you can use this intrinsic to load floating point vectors, even though the return type represents integer vectors.
Operation
dst = memory_load(256, addr + offset);
__m256i __lasx_xvldrepl_b (void * addr, imm_n2048_2047 offset)
Synopsis
__m256i __lasx_xvldrepl_b (void * addr, imm_n2048_2047 offset)
#include <lasxintrin.h>
Instruction: xvldrepl.b xr, r, imm
CPU Flags: LASX
Description
Read 8-bit data from memory address addr + (offset << 0)
, replicate the data to all vector lanes and save into dst
.
Operation
u8 data = memory_load(8, addr + offset);
for (int i = 0; i < 32; i++) {
dst.byte[i] = data;
}
__m256i __lasx_xvldrepl_h (void * addr, imm_n1024_1023 offset)
Synopsis
__m256i __lasx_xvldrepl_h (void * addr, imm_n1024_1023 offset)
#include <lasxintrin.h>
Instruction: xvldrepl.h xr, r, imm
CPU Flags: LASX
Description
Read 16-bit data from memory address addr + (offset << 1)
, replicate the data to all vector lanes and save into dst
.
Operation
u16 data = memory_load(16, addr + (offset << 1));
for (int i = 0; i < 16; i++) {
dst.half[i] = data;
}
__m256i __lasx_xvldrepl_w (void * addr, imm_n512_511 offset)
Synopsis
__m256i __lasx_xvldrepl_w (void * addr, imm_n512_511 offset)
#include <lasxintrin.h>
Instruction: xvldrepl.w xr, r, imm
CPU Flags: LASX
Description
Read 32-bit data from memory address addr + (offset << 2)
, replicate the data to all vector lanes and save into dst
.
Operation
u32 data = memory_load(32, addr + (offset << 2));
for (int i = 0; i < 8; i++) {
dst.word[i] = data;
}
__m256i __lasx_xvldrepl_d (void * addr, imm_n256_255 offset)
Synopsis
__m256i __lasx_xvldrepl_d (void * addr, imm_n256_255 offset)
#include <lasxintrin.h>
Instruction: xvldrepl.d xr, r, imm
CPU Flags: LASX
Description
Read 64-bit data from memory address addr + (offset << 3)
, replicate the data to all vector lanes and save into dst
.
Operation
u64 data = memory_load(64, addr + (offset << 3));
for (int i = 0; i < 4; i++) {
dst.dword[i] = data;
}
void __lasx_xvst (__m256i data, void * addr, imm_n2048_2047 offset)
Synopsis
void __lasx_xvst (__m256i data, void * addr, imm_n2048_2047 offset)
#include <lasxintrin.h>
Instruction: xvst xr, r, imm
CPU Flags: LASX
Description
Write whole vector data in data
to memory address addr + offset
.
Operation
memory_store(256, data, addr + offset);
void __lasx_xvstx (__m256i data, void * addr, long int offset)
Synopsis
void __lasx_xvstx (__m256i data, void * addr, long int offset)
#include <lasxintrin.h>
Instruction: xvstx xr, r, r
CPU Flags: LASX
Description
Write whole-vector data in data
to memory address addr + offset
.
Operation
memory_store(256, data, addr + offset);
void __lasx_xvstelm_b (__m256i data, void * addr, imm_n128_127 offset, imm0_31 lane)
Synopsis
void __lasx_xvstelm_b (__m256i data, void * addr, imm_n128_127 offset, imm0_31 lane)
#include <lasxintrin.h>
Instruction: xvstelm.b xr, r, imm, imm
CPU Flags: LASX
Description
Store the 8-bit element in data
specified by lane
to memory address addr + offset
.
Operation
memory_store(8, data.byte[lane], addr + offset);
void __lasx_xvstelm_h (__m256i data, void * addr, imm_n128_127 offset, imm0_15 lane)
Synopsis
void __lasx_xvstelm_h (__m256i data, void * addr, imm_n128_127 offset, imm0_15 lane)
#include <lasxintrin.h>
Instruction: xvstelm.h xr, r, imm, imm
CPU Flags: LASX
Description
Store the 16-bit element in data
specified by lane
to memory address addr + offset
.
Operation
memory_store(16, data.half[lane], addr + offset);
void __lasx_xvstelm_w (__m256i data, void * addr, imm_n128_127 offset, imm0_7 lane)
Synopsis
void __lasx_xvstelm_w (__m256i data, void * addr, imm_n128_127 offset, imm0_7 lane)
#include <lasxintrin.h>
Instruction: xvstelm.w xr, r, imm, imm
CPU Flags: LASX
Description
Store the 32-bit element in data
specified by lane
to memory address addr + offset
.
Operation
memory_store(32, data.word[lane], addr + offset);
void __lasx_xvstelm_d (__m256i data, void * addr, imm_n128_127 offset, imm0_3 lane)
Synopsis
void __lasx_xvstelm_d (__m256i data, void * addr, imm_n128_127 offset, imm0_3 lane)
#include <lasxintrin.h>
Instruction: xvstelm.d xr, r, imm, imm
CPU Flags: LASX
Description
Store the 64-bit element in data
specified by lane
to memory address addr + offset
.
Operation
memory_store(64, data.dword[lane], addr + offset);