Commit fc3c4560 authored by Parag Salasakar's avatar Parag Salasakar Committed by Gerrit Code Review
Browse files

Merge "mips msa vp9 common macro comments updated"

parents 1e77058b 3c353e58
......@@ -229,13 +229,12 @@
#endif // (__mips_isa_rev >= 6)
/* Description : Load 4 words with stride
Arguments : Inputs - psrc (source pointer to load from)
- stride
Arguments : Inputs - psrc, stride
Outputs - out0, out1, out2, out3
Details : Loads word in 'out0' from (psrc)
Loads word in 'out1' from (psrc + stride)
Loads word in 'out2' from (psrc + 2 * stride)
Loads word in 'out3' from (psrc + 3 * stride)
Details : Load word in 'out0' from (psrc)
Load word in 'out1' from (psrc + stride)
Load word in 'out2' from (psrc + 2 * stride)
Load word in 'out3' from (psrc + 3 * stride)
*/
#define LW4(psrc, stride, out0, out1, out2, out3) { \
out0 = LW((psrc)); \
......@@ -245,11 +244,10 @@
}
/* Description : Load double words with stride
Arguments : Inputs - psrc (source pointer to load from)
- stride
Arguments : Inputs - psrc, stride
Outputs - out0, out1
Details : Loads double word in 'out0' from (psrc)
Loads double word in 'out1' from (psrc + stride)
Details : Load double word in 'out0' from (psrc)
Load double word in 'out1' from (psrc + stride)
*/
#define LD2(psrc, stride, out0, out1) { \
out0 = LD((psrc)); \
......@@ -261,11 +259,11 @@
}
/* Description : Store 4 words with stride
Arguments : Inputs - in0, in1, in2, in3, pdst, stride
Details : Stores word from 'in0' to (pdst)
Stores word from 'in1' to (pdst + stride)
Stores word from 'in2' to (pdst + 2 * stride)
Stores word from 'in3' to (pdst + 3 * stride)
Arguments : Inputs - in0, in1, in2, in3, pdst, stride
Details : Store word from 'in0' to (pdst)
Store word from 'in1' to (pdst + stride)
Store word from 'in2' to (pdst + 2 * stride)
Store word from 'in3' to (pdst + 3 * stride)
*/
#define SW4(in0, in1, in2, in3, pdst, stride) { \
SW(in0, (pdst)) \
......@@ -275,11 +273,11 @@
}
/* Description : Store 4 double words with stride
Arguments : Inputs - in0, in1, in2, in3, pdst, stride
Details : Stores double word from 'in0' to (pdst)
Stores double word from 'in1' to (pdst + stride)
Stores double word from 'in2' to (pdst + 2 * stride)
Stores double word from 'in3' to (pdst + 3 * stride)
Arguments : Inputs - in0, in1, in2, in3, pdst, stride
Details : Store double word from 'in0' to (pdst)
Store double word from 'in1' to (pdst + stride)
Store double word from 'in2' to (pdst + 2 * stride)
Store double word from 'in3' to (pdst + 3 * stride)
*/
#define SD4(in0, in1, in2, in3, pdst, stride) { \
SD(in0, (pdst)) \
......@@ -289,12 +287,11 @@
}
/* Description : Load vectors with 16 byte elements with stride
Arguments : Inputs - psrc (source pointer to load from)
- stride
Arguments : Inputs - psrc, stride
Outputs - out0, out1
Return Type - as per RTYPE
Details : Loads 16 byte elements in 'out0' from (psrc)
Loads 16 byte elements in 'out1' from (psrc + stride)
Details : Load 16 byte elements in 'out0' from (psrc)
Load 16 byte elements in 'out1' from (psrc + stride)
*/
#define LD_B2(RTYPE, psrc, stride, out0, out1) { \
out0 = LD_B(RTYPE, (psrc)); \
......@@ -333,11 +330,10 @@
#define LD_SB8(...) LD_B8(v16i8, __VA_ARGS__)
/* Description : Load vectors with 8 halfword elements with stride
Arguments : Inputs - psrc (source pointer to load from)
- stride
Arguments : Inputs - psrc, stride
Outputs - out0, out1
Details : Loads 8 halfword elements in 'out0' from (psrc)
Loads 8 halfword elements in 'out1' from (psrc + stride)
Details : Load 8 halfword elements in 'out0' from (psrc)
Load 8 halfword elements in 'out1' from (psrc + stride)
*/
#define LD_H2(RTYPE, psrc, stride, out0, out1) { \
out0 = LD_H(RTYPE, (psrc)); \
......@@ -368,9 +364,9 @@
}
#define LD_SH16(...) LD_H16(v8i16, __VA_ARGS__)
/* Description : Load as 4x4 block of signed halfword elements from 1D source
/* Description : Load 4x4 block of signed halfword elements from 1D source
data into 4 vectors (Each vector with 4 signed halfwords)
Arguments : Inputs - psrc
Arguments : Input - psrc
Outputs - out0, out1, out2, out3
*/
#define LD4x4_SH(psrc, out0, out1, out2, out3) { \
......@@ -381,8 +377,7 @@
}
/* Description : Load 2 vectors of signed word elements with stride
Arguments : Inputs - psrc (source pointer to load from)
- stride
Arguments : Inputs - psrc, stride
Outputs - out0, out1
Return Type - signed word
*/
......@@ -392,10 +387,9 @@
}
/* Description : Store vectors of 16 byte elements with stride
Arguments : Inputs - in0, in1, stride
Outputs - pdst (destination pointer to store to)
Details : Stores 16 byte elements from 'in0' to (pdst)
Stores 16 byte elements from 'in1' to (pdst + stride)
Arguments : Inputs - in0, in1, pdst, stride
Details : Store 16 byte elements from 'in0' to (pdst)
Store 16 byte elements from 'in1' to (pdst + stride)
*/
#define ST_B2(RTYPE, in0, in1, pdst, stride) { \
ST_B(RTYPE, in0, (pdst)); \
......@@ -417,10 +411,9 @@
#define ST_UB8(...) ST_B8(v16u8, __VA_ARGS__)
/* Description : Store vectors of 8 halfword elements with stride
Arguments : Inputs - in0, in1, stride
Outputs - pdst (destination pointer to store to)
Details : Stores 8 halfword elements from 'in0' to (pdst)
Stores 8 halfword elements from 'in1' to (pdst + stride)
Arguments : Inputs - in0, in1, pdst, stride
Details : Store 8 halfword elements from 'in0' to (pdst)
Store 8 halfword elements from 'in1' to (pdst + stride)
*/
#define ST_H2(RTYPE, in0, in1, pdst, stride) { \
ST_H(RTYPE, in0, (pdst)); \
......@@ -441,8 +434,7 @@
#define ST_SH8(...) ST_H8(v8i16, __VA_ARGS__)
/* Description : Store vectors of word elements with stride
Arguments : Inputs - in0, in1, stride
- pdst (destination pointer to store to)
Arguments : Inputs - in0, in1, pdst, stride
Details : Store 4 word elements from 'in0' to (pdst)
Store 4 word elements from 'in1' to (pdst + stride)
*/
......@@ -451,17 +443,16 @@
ST_SW(in1, (pdst) + stride); \
}
/* Description : Store as 2x4 byte block to destination memory from input vector
Arguments : Inputs - in, stidx, pdst, stride
Return Type - unsigned byte
Details : Index stidx halfword element from 'in' vector is copied and
stored on first line
Index stidx+1 halfword element from 'in' vector is copied and
stored on second line
Index stidx+2 halfword element from 'in' vector is copied and
stored on third line
Index stidx+3 halfword element from 'in' vector is copied and
stored on fourth line
/* Description : Store 2x4 byte block to destination memory from input vector
Arguments : Inputs - in, stidx, pdst, stride
Details : Index 'stidx' halfword element from 'in' vector is copied to
the GP register and stored to (pdst)
Index 'stidx+1' halfword element from 'in' vector is copied to
the GP register and stored to (pdst + stride)
Index 'stidx+2' halfword element from 'in' vector is copied to
the GP register and stored to (pdst + 2 * stride)
Index 'stidx+3' halfword element from 'in' vector is copied to
the GP register and stored to (pdst + 3 * stride)
*/
#define ST2x4_UB(in, stidx, pdst, stride) { \
uint16_t out0_m, out1_m, out2_m, out3_m; \
......@@ -479,10 +470,10 @@
}
/* Description : Store 4x2 byte block to destination memory from input vector
Arguments : Inputs - in, pdst, stride
Details : Index 0 word element from 'in' vector is copied to a GP
Arguments : Inputs - in, pdst, stride
Details : Index 0 word element from 'in' vector is copied to the GP
register and stored to (pdst)
Index 1 word element from 'in' vector is copied to a GP
Index 1 word element from 'in' vector is copied to the GP
register and stored to (pdst + stride)
*/
#define ST4x2_UB(in, pdst, stride) { \
......@@ -496,17 +487,16 @@
SW(out1_m, pblk_4x2_m + stride); \
}
/* Description : Store as 4x4 byte block to destination memory from input vector
Arguments : Inputs - in0, in1, pdst, stride
Return Type - unsigned byte
Details : Idx0 word element from input vector 'in0' is copied and stored
on first line
Idx1 word element from input vector 'in0' is copied and stored
on second line
Idx2 word element from input vector 'in1' is copied and stored
on third line
Idx3 word element from input vector 'in1' is copied and stored
on fourth line
/* Description : Store 4x4 byte block to destination memory from input vector
Arguments : Inputs - in0, in1, pdst, stride
Details : 'Idx0' word element from input vector 'in0' is copied to the
GP register and stored to (pdst)
'Idx1' word element from input vector 'in0' is copied to the
GP register and stored to (pdst + stride)
'Idx2' word element from input vector 'in0' is copied to the
GP register and stored to (pdst + 2 * stride)
'Idx3' word element from input vector 'in0' is copied to the
GP register and stored to (pdst + 3 * stride)
*/
#define ST4x4_UB(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) { \
uint32_t out0_m, out1_m, out2_m, out3_m; \
......@@ -526,10 +516,10 @@
ST4x4_UB(in1, in1, 0, 1, 2, 3, pblk_4x8 + 4 * stride, stride); \
}
/* Description : Store as 8x1 byte block to destination memory from input vector
Arguments : Inputs - in, pdst
Details : Index 0 double word element from input vector 'in' is copied
and stored to destination memory at (pdst)
/* Description : Store 8x1 byte block to destination memory from input vector
Arguments : Inputs - in, pdst
Details : Index 0 double word element from 'in' vector is copied to the
GP register and stored to (pdst)
*/
#define ST8x1_UB(in, pdst) { \
uint64_t out0_m; \
......@@ -538,12 +528,12 @@
SD(out0_m, pdst); \
}
/* Description : Store as 8x2 byte block to destination memory from input vector
Arguments : Inputs - in, pdst, stride
Details : Index 0 double word element from input vector 'in' is copied
and stored to destination memory at (pdst)
Index 1 double word element from input vector 'in' is copied
and stored to destination memory at (pdst + stride)
/* Description : Store 8x2 byte block to destination memory from input vector
Arguments : Inputs - in, pdst, stride
Details : Index 0 double word element from 'in' vector is copied to the
GP register and stored to (pdst)
Index 1 double word element from 'in' vector is copied to the
GP register and stored to (pdst + stride)
*/
#define ST8x2_UB(in, pdst, stride) { \
uint64_t out0_m, out1_m; \
......@@ -556,17 +546,17 @@
SD(out1_m, pblk_8x2_m + stride); \
}
/* Description : Store as 8x4 byte block to destination memory from input
/* Description : Store 8x4 byte block to destination memory from input
vectors
Arguments : Inputs - in0, in1, pdst, stride
Details : Index 0 double word element from input vector 'in0' is copied
and stored to destination memory at (pblk_8x4_m)
Index 1 double word element from input vector 'in0' is copied
and stored to destination memory at (pblk_8x4_m + stride)
Index 0 double word element from input vector 'in1' is copied
and stored to destination memory at (pblk_8x4_m + 2 * stride)
Index 1 double word element from input vector 'in1' is copied
and stored to destination memory at (pblk_8x4_m + 3 * stride)
Arguments : Inputs - in0, in1, pdst, stride
Details : Index 0 double word element from 'in0' vector is copied to the
GP register and stored to (pdst)
Index 1 double word element from 'in0' vector is copied to the
GP register and stored to (pdst + stride)
Index 0 double word element from 'in1' vector is copied to the
GP register and stored to (pdst + 2 * stride)
Index 1 double word element from 'in1' vector is copied to the
GP register and stored to (pdst + 3 * stride)
*/
#define ST8x4_UB(in0, in1, pdst, stride) { \
uint64_t out0_m, out1_m, out2_m, out3_m; \
......@@ -583,14 +573,10 @@
/* Description : average with rounding (in0 + in1 + 1) / 2.
Arguments : Inputs - in0, in1, in2, in3,
Outputs - out0, out1
Return Type - signed byte
Details : Each byte element from 'in0' vector is added with each byte
element from 'in1' vector. The addition of the elements plus 1
(for rounding) is done unsigned with full precision,
i.e. the result has one extra bit. Unsigned division by 2
(or logical shift right by one bit) is performed before writing
the result to vector 'out0'
Similar for the pair of 'in2' and 'in3'
Return Type - as per RTYPE
Details : Each unsigned byte element from 'in0' vector is added with
each unsigned byte element from 'in1' vector. Then average
with rounding is calculated and written to 'out0'
*/
#define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_aver_u_b((v16u8)in0, (v16u8)in1); \
......@@ -605,12 +591,12 @@
}
#define AVER_UB4_UB(...) AVER_UB4(v16u8, __VA_ARGS__)
/* Description : Immediate number of columns to slide with zero
/* Description : Immediate number of elements to slide with zero
Arguments : Inputs - in0, in1, slide_val
Outputs - out0, out1
Return Type - as per RTYPE
Details : Byte elements from 'zero_m' vector are slide into 'in0' by
number of elements specified by 'slide_val'
value specified in the 'slide_val'
*/
#define SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val) { \
v16i8 zero_m = { 0 }; \
......@@ -626,12 +612,12 @@
}
#define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__)
/* Description : Immediate number of columns to slide
/* Description : Immediate number of elements to slide
Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val
Outputs - out0, out1
Return Type - as per RTYPE
Details : Byte elements from 'in0_0' vector are slide into 'in1_0' by
number of elements specified by 'slide_val'
value specified in the 'slide_val'
*/
#define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) { \
out0 = (RTYPE)__msa_sldi_b((v16i8)in0_0, (v16i8)in1_0, slide_val); \
......@@ -651,10 +637,8 @@
Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
Outputs - out0, out1
Return Type - as per RTYPE
Details : Selective byte elements from in0 & in1 are copied to out0 as
per control vector mask0
Selective byte elements from in2 & in3 are copied to out1 as
per control vector mask1
Details : Byte elements from 'in0' & 'in1' are copied selectively to
'out0' as per control vector 'mask0'
*/
#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \
out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
......@@ -673,16 +657,14 @@
#define VSHF_B4_SH(...) VSHF_B4(v8i16, __VA_ARGS__)
/* Description : Dot product of byte vector elements
Arguments : Inputs - mult0, mult1
cnst0, cnst1
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
Return Type - unsigned halfword
Details : Unsigned byte elements from mult0 are multiplied with
unsigned byte elements from cnst0 producing a result
Return Type - as per RTYPE
Details : Unsigned byte elements from 'mult0' are multiplied with
unsigned byte elements from 'cnst0' producing a result
twice the size of input i.e. unsigned halfword.
Then this multiplication results of adjacent odd-even elements
are added together and stored to the out vector
(2 unsigned halfword results)
The multiplication result of adjacent odd-even elements
are added together and written to the 'out0' vector
*/
#define DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) { \
out0 = (RTYPE)__msa_dotp_u_h((v16u8)mult0, (v16u8)cnst0); \
......@@ -699,16 +681,14 @@
#define DOTP_UB4_UH(...) DOTP_UB4(v8u16, __VA_ARGS__)
/* Description : Dot product of byte vector elements
Arguments : Inputs - mult0, mult1
cnst0, cnst1
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
Return Type - signed halfword
Details : Signed byte elements from mult0 are multiplied with
signed byte elements from cnst0 producing a result
Return Type - as per RTYPE
Details : Signed byte elements from 'mult0' are multiplied with
signed byte elements from 'cnst0' producing a result
twice the size of input i.e. signed halfword.
Then this multiplication results of adjacent odd-even elements
are added together and stored to the out vector
(2 signed halfword results)
The multiplication result of adjacent odd-even elements
are added together and written to the 'out0' vector
*/
#define DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) { \
out0 = (RTYPE)__msa_dotp_s_h((v16i8)mult0, (v16i8)cnst0); \
......@@ -724,16 +704,14 @@
#define DOTP_SB4_SH(...) DOTP_SB4(v8i16, __VA_ARGS__)
/* Description : Dot product of halfword vector elements
Arguments : Inputs - mult0, mult1
cnst0, cnst1
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
Return Type - signed word
Details : Signed halfword elements from mult0 are multiplied with
signed halfword elements from cnst0 producing a result
Return Type - as per RTYPE
Details : Signed halfword elements from 'mult0' are multiplied with
signed halfword elements from 'cnst0' producing a result
twice the size of input i.e. signed word.
Then this multiplication results of adjacent odd-even elements
are added together and stored to the out vector
(2 signed word results)
The multiplication result of adjacent odd-even elements
are added together and written to the 'out0' vector
*/
#define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) { \
out0 = (RTYPE)__msa_dotp_s_w((v8i16)mult0, (v8i16)cnst0); \
......@@ -750,16 +728,14 @@
#define DOTP_SH4_SW(...) DOTP_SH4(v4i32, __VA_ARGS__)
/* Description : Dot product of word vector elements
Arguments : Inputs - mult0, mult1
cnst0, cnst1
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
Return Type - signed word
Details : Signed word elements from mult0 are multiplied with
signed word elements from cnst0 producing a result
Return Type - as per RTYPE
Details : Signed word elements from 'mult0' are multiplied with
signed word elements from 'cnst0' producing a result
twice the size of input i.e. signed double word.
Then this multiplication results of adjacent odd-even elements
are added together and stored to the out vector
(2 signed double word results)
The multiplication result of adjacent odd-even elements
are added together and written to the 'out0' vector
*/
#define DOTP_SW2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) { \
out0 = (RTYPE)__msa_dotp_s_d((v4i32)mult0, (v4i32)cnst0); \
......@@ -768,16 +744,14 @@
#define DOTP_SW2_SD(...) DOTP_SW2(v2i64, __VA_ARGS__)
/* Description : Dot product & addition of byte vector elements
Arguments : Inputs - mult0, mult1
cnst0, cnst1
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
Return Type - signed halfword
Details : Signed byte elements from mult0 are multiplied with
signed byte elements from cnst0 producing a result
Return Type - as per RTYPE
Details : Signed byte elements from 'mult0' are multiplied with
signed byte elements from 'cnst0' producing a result
twice the size of input i.e. signed halfword.
Then this multiplication results of adjacent odd-even elements
are added to the out vector
(2 signed halfword results)
The multiplication result of adjacent odd-even elements
are added to the 'out0' vector
*/
#define DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) { \
out0 = (RTYPE)__msa_dpadd_s_h((v8i16)out0, (v16i8)mult0, (v16i8)cnst0); \
......@@ -793,8 +767,7 @@
#define DPADD_SB4_SH(...) DPADD_SB4(v8i16, __VA_ARGS__)
/* Description : Dot product & addition of halfword vector elements
Arguments : Inputs - mult0, mult1
cnst0, cnst1
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
Return Type - as per RTYPE
Details : Signed halfword elements from 'mult0' are multiplied with
......@@ -828,10 +801,10 @@
/* Description : Minimum values between unsigned elements of
either vector are copied to the output vector
Arguments : Inputs - in0, in1, min_vec
Outputs - in0, in1, (in place)
Return Type - unsigned halfword
Outputs - in place operation
Return Type - as per RTYPE
Details : Minimum of unsigned halfword element values from 'in0' and
'min_value' are written to output vector 'in0'
'min_vec' are written to output vector 'in0'
*/
#define MIN_UH2(RTYPE, in0, in1, min_vec) { \
in0 = (RTYPE)__msa_min_u_h((v8u16)in0, min_vec); \
......@@ -847,8 +820,8 @@
/* Description : Clips all signed halfword elements of input vector
between 0 & 255
Arguments : Inputs - in (input vector)
Outputs - out_m (output vector with clipped elements)
Arguments : Input - in
Output - out_m
Return Type - signed halfword
*/
#define CLIP_SH_0_255(in) ({ \
......@@ -868,12 +841,12 @@
CLIP_SH2_0_255(in2, in3); \
}
/* Description : Addition of 4 signed word elements
4 signed word elements of input vector are added together and
/* Description : Horizontal addition of 4 signed word elements of input vector
Arguments : Input - in (signed word vector)
Output - sum_m (i32 sum)
Return Type - signed word (GP)
Details : 4 signed word elements of 'in' vector are added together and
the resulting integer sum is returned
Arguments : Inputs - in (signed word vector)
Outputs - sum_m (i32 sum)
Return Type - signed word
*/
#define HADD_SW_S32(in) ({ \
v2i64 res0_m, res1_m; \
......@@ -892,7 +865,7 @@
Return Type - as per RTYPE
Details : Each unsigned odd byte element from 'in0' is added to
even unsigned byte element from 'in0' (pairwise) and the
halfword result is stored in 'out0'
halfword result is written to 'out0'
*/
#define HADD_UB2(RTYPE, in0, in1, out0, out1) { \
out0 = (RTYPE)__msa_hadd_u_h((v16u8)in0, (v16u8)in0); \
......@@ -934,11 +907,11 @@
}
#define HSUB_UH2_SW(...) HSUB_UH2(v4i32, __VA_ARGS__)
/* Description : Insert specified word elements from input vectors to 1
destination vector
Arguments : Inputs - in0, in1, in2, in3 (4 input vectors)
Outputs - out (output vector)
/* Description : Set element n input vector to GPR value
Arguments : Inputs - in0, in1, in2, in3
Output - out
Return Type - as per RTYPE
Details : Set element 0 in vector 'out' to value specified in 'in0'
*/
#define INSERT_W2(RTYPE, in0, in1, out) { \
out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \
......@@ -955,12 +928,6 @@
#define INSERT_W4_UB(...) INSERT_W4(v16u8, __VA_ARGS__)
#define INSERT_W4_SB(...) INSERT_W4(v16i8, __VA_ARGS__)
/* Description : Insert specified double word elements from input vectors to 1
destination vector
Arguments : Inputs - in0, in1 (2 input vectors)
Outputs - out (output vector)
Return Type - as per RTYPE
*/
#define INSERT_D2(RTYPE, in0, in1, out) { \
out = (RTYPE)__msa_insert_d((v2i64)out, 0, in0); \
out = (RTYPE)__msa_insert_d((v2i64)out, 1, in1); \
......@@ -972,10 +939,8 @@
Arguments : Inputs - in0, in1, in2, in3
Outputs - out0, out1
Return Type - as per RTYPE
Details : Even byte elements of 'in0' and even byte
elements of 'in1' are interleaved and copied to 'out0'
Even byte elements of 'in2' and even byte
elements of 'in3' are interleaved and copied to 'out1'
Details : Even byte elements of 'in0' and 'in1' are interleaved
and written to 'out0'
*/
#define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \
......@@ -988,10 +953,8 @@
Arguments : Inputs - in0, in1, in2, in3
Outputs - out0, out1
Return Type - as per RTYPE
Details : Even halfword elements of 'in0' and even halfword
elements of 'in1' are interleaved and copied to 'out0'
Even halfword elements of 'in2' and even halfword
elements of 'in3' are interleaved and copied to 'out1'
Details : Even halfword elements of 'in0' and 'in1' are interleaved
and written to 'out0'
*/
#define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \
......@@ -1018,10 +981,8 @@
Arguments : Inputs - in0, in1, in2, in3
Outputs - out0, out1
Return Type - as per RTYPE
Details : Even double word elements of 'in0' and even double word
elements of 'in1' are interleaved and copied to 'out0'
Even double word elements of 'in2' and even double word
elements of 'in3' are interleaved and copied to 'out1'
Details : Even double word elements of 'in0' and 'in1' are interleaved
and written to 'out0'
*/
#define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_ilvev_d((v2i64)in1, (v2i64)in0); \
......@@ -1033,10 +994,8 @@
Arguments : Inputs - in0, in1, in2, in3
Outputs - out0, out1
Return Type - as per RTYPE
Details : Left half of byte elements of in0 and left half of byte
elements of in1 are interleaved and copied to out0.
Left half of byte elements of in2 and left half of byte
elements of in3 are interleaved and copied to out1.
Details : Left half of byte elements of 'in0' and 'in1' are interleaved
and written to 'out0'.
*/
#define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
......@@ -1059,10 +1018,8 @@
Arguments : Inputs - in0, in1, in2, in3
Outputs - out0, out1
Return Type - as per RTYPE
Details : Left half of halfword elements of in0 and left half of halfword
elements of in1 are interleaved and copied to out0.
Left half of halfword elements of in2 and left half of halfword
elements of in3 are interleaved and copied to out1.
Details : Left half of halfword elements of 'in0' and 'in1' are
interleaved and written to 'out0'.
*/
#define ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \
......@@ -1074,10 +1031,8 @@
Arguments : Inputs - in0, in1, in2, in3
Outputs - out0, out1
Return Type - as per RTYPE
Details : Left half of word elements of in0 and left half of word
elements of in1 are interleaved and copied to out0.
Left half of word elements of in2 and left half of word
elements of in3 are interleaved and copied to out1.
Details : Left half of word elements of 'in0' and 'in1' are interleaved
and written to 'out0'.
*/
#define ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \
......@@ -1087,14 +1042,11 @@
#define ILVL_W2_SH(...) ILVL_W2(v8i16, __VA_ARGS__)
/* Description : Interleave right half of byte elements from vectors
Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
Outputs - out0, out1, out2, out3
Arguments : Inputs - in0, in1, in2, in3
Outputs - out0, out1
Return Type - as per RTYPE
Details : Right half of byte elements of in0 and right half of byte
elements of in1 are interleaved and copied to out0.
Right half of byte elements of in2 and right half of byte
elements of in3 are interleaved and copied to out1.
Similar for other pairs
Details : Right half of byte elements of 'in0' and 'in1' are interleaved
and written to out0.
*/
#define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
......@@ -1126,14 +1078,11 @@
#define ILVR_B8_UH(...) ILVR_B8(v8u16, __VA_ARGS__)
/* Description : Interleave right half of halfword elements from vectors
Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
Outputs - out0, out1, out2, out3
Return Type - signed halfword
Details : Right half of halfword elements of in0 and right half of